Merge pull request #445 from Hestia-Homes/sfr

Sfr
This commit is contained in:
KhalimCK 2025-07-14 10:40:35 +01:00 committed by GitHub
commit 905a7295b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 3065 additions and 357 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -301,6 +301,14 @@ class AssetList:
"Potential unsafe environment", "Date of Inspection", "Borescoped?"
]
# Another version of non-intrusives:
NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 = [
'Archetype', 'Archetype 2', 'Construction', 'Insulated', 'Material', 'Boroscoped?',
'CIGA Check Required', 'ROOF ORIENTATION', 'TILE HUNG', 'RENDERED',
'CLADDING', 'ACCESS ISSUES', 'FURTHER SURVEYOR NOTES', 'DATE',
'NAME OF SURVEYOR'
]
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
@ -442,6 +450,8 @@ class AssetList:
self.non_intrusives_present = "CIGA Check Required" in self.raw_asset_list.columns
# We detect if we have the old format of non-intruvies
self.old_format_non_intrusives_present = "WFT Findings" in self.raw_asset_list.columns
if self.old_format_non_intrusives_present:
self.non_intrusives_present = False
self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns
@ -449,6 +459,8 @@ class AssetList:
"Has the property been re-walled?" in self.raw_asset_list.columns
)
self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns
# Names of columns
self.landlord_property_id = landlord_property_id
self.address1_colname = address1_colname
@ -750,7 +762,7 @@ class AssetList:
self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}
non_intrusive_columns = []
if self.non_intrusives_present:
if self.non_intrusives_present and not self.new_format_non_insturives_present_v2:
non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES
if self.non_intrusives_eligibility:
@ -759,6 +771,9 @@ class AssetList:
if self.new_format_non_insturives_present:
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES
if self.new_format_non_insturives_present_v2:
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2
if self.old_format_non_intrusives_present:
# We check if we have the ECO Eligibility column, which we might not have
non_intrusive_columns = [
@ -827,52 +842,44 @@ class AssetList:
# We attempt to convert the year built to a datetime, by detecting the format and converting
def extract_year(date_str):
"""
Extracts the year from a date string in the format '01-Jul-YYYY'.
Returns the extracted year as an integer or None if the format is incorrect.
"""
known_errors = [
known_errors = {
"#MULTIVALUE",
"ND",
"PIMSS EMPTY",
"UNKNOWN",
"This cell has an external reference that can't be shown or edited. Editing this cell will "
"remove the external reference.",
"ND",
'PIMSS EMPTY',
"UNKNOWN"
]
0
}
if pd.isnull(date_str) or date_str in known_errors or (date_str == 0):
if pd.isnull(date_str) or date_str in known_errors:
return None
if isinstance(date_str, str):
match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str)
if match:
return int(match.group(1)) # Extract the year and convert to integer
if "-" in date_str:
# Count the number of times we have "-", as we've seen double ranges
# (when we have extensions) so the format is like this:
# 'G: 1983-1990, H: 1991-1995'
if date_str.count("-") == 2:
# We have a range
return int(date_str.split("-")[1].split(",")[0])
# We probably have a range
return int(date_str.split("-")[1].strip())
# Handle datetime
if isinstance(date_str, datetime):
return date_str.year
if isinstance(date_str, float):
if str(int(date_str)).isdigit() & (len(str(int(date_str))) == 4):
# Handle numeric year (float or int)
if isinstance(date_str, (int, float)):
if 1000 <= int(date_str) <= 2100:
return int(date_str)
# Check if date_str is a year itself
if str(date_str).isdigit() & (len(str(date_str)) == 4):
return int(date_str)
# Now handle string-based logic
if isinstance(date_str, str):
# Direct date match e.g. 01-Jul-2021
match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str)
if match:
return int(match.group(1))
# Remove any non-numeric characters
date_str = re.sub(r"\D", "", str(date_str))
if str(date_str).isdigit() & (len(str(date_str)) == 4):
return int(date_str)
# Find all 4-digit years in string
years = [int(y) for y in re.findall(r"\b(?:19|20)\d{2}\b", date_str)]
if years:
return max(years) # Return most recent year
# If only numbers are present without format
numeric_str = re.sub(r"\D", "", date_str)
if len(numeric_str) == 4 and numeric_str.isdigit():
return int(numeric_str)
raise NotImplementedError(f"Unhandled format for year built, value is {date_str} - implement me")
@ -1104,7 +1111,7 @@ class AssetList:
num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
floor_height=(
float(x[self.EPC_API_DATA_NAMES["floor-height"]]) if
x[self.EPC_API_DATA_NAMES["floor-height"]] else 2.5
not pd.isnull(x[self.EPC_API_DATA_NAMES["floor-height"]]) else 2.5
),
perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER],
built_form=x[self.EPC_API_DATA_NAMES["built-form"]]
@ -1315,10 +1322,16 @@ class AssetList:
# Before we being, we identify if a property has solar already as we use this
# for identifying cavity jobs
if self.non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
)
if self.non_intrusives_present and not self.old_format_non_intrusives_present:
if self.new_format_non_insturives_present_v2:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"] == "ALREADY HAS SOLAR PV"
)
else:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF"
)
elif self.old_format_non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin(
@ -1557,7 +1570,7 @@ class AssetList:
) & (
~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
["district heating", "communal heating", "communal gas boiler"]
) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].str.contains("gas ")
) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].astype(str).str.contains("gas ")
)
)
@ -1596,12 +1609,17 @@ class AssetList:
# With this in mind, we look for 2 clases
# 1) The property is fully insulated apart from the loft (<200mm insulation)
# 2) THe property is fully insulated
print("Should we include cavity properties where they might be uninsulated?")
self.standardised_asset_list["solar_landlord_walls_insulated"] = (
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(
[
"filled cavity", "insulated solid brick", "insulated timber frame",
"filled cavity",
"insulated solid brick",
"insulated timber frame",
"uninsulated cavity",
"insulated system built",
"insulated granite or whinstone",
"insulated sandstone or limestone",
"new build - average thermal transmittance"
]
)
)
@ -1999,24 +2017,6 @@ class AssetList:
self.standardised_asset_list[col]
)
blocks_of_flats = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
]
non_blocks_of_flats = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
]
# Produce some aggregate figures
self.work_type_figures = {
**non_blocks_of_flats["cavity_reason"].value_counts().to_dict(),
**{
k + " (Block of flats)": v for k, v in
blocks_of_flats["solar_reason"].value_counts().to_dict().items()
},
**self.standardised_asset_list["solar_reason"].value_counts().to_dict()
}
# We prepare outcomes for output
if self.outcomes is not None:
logger.info("Preparing outcomes for output")
@ -2047,6 +2047,26 @@ class AssetList:
)
)
def get_work_figures(self):
blocks_of_flats = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
]
non_blocks_of_flats = self.standardised_asset_list[
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
]
# Produce some aggregate figures
self.work_type_figures = {
**non_blocks_of_flats["cavity_reason"].value_counts().to_dict(),
**{
k + " (Block of flats)": v for k, v in
blocks_of_flats["solar_reason"].value_counts().to_dict().items()
},
**self.standardised_asset_list["solar_reason"].value_counts().to_dict()
}
pprint(self.work_type_figures)
def fill_landlord_block_reference(self, has_blocks_of_flats):
if not has_blocks_of_flats:
return
@ -2082,15 +2102,40 @@ class AssetList:
for _, row in blocks.iterrows():
addr = str(row[self.STANDARD_ADDRESS_1])
full_addr = row[self.STANDARD_FULL_ADDRESS]
# We also look for terms like "Odd", "even", "all" in the address to indicate if it should be just
# the odds, evens or all of the numbers
has_odd = (
"(odd)" in addr.lower() or
"(odd)" in full_addr.lower() or
"(odds)" in addr.lower() or
"(odds)" in full_addr.lower()
)
has_even = (
"(even)" in addr.lower() or
"(even)" in full_addr.lower() or
"(evens)" in addr.lower() or
"(evens)" in full_addr.lower()
)
# 1 ─ Range (e.g. 1-7)
m_range = RANGE_RE.search(addr)
if m_range:
start, end = m_range.groups()
start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0])
if start > end or (end - start) > 100:
raise ValueError(f"Suspicious range '{addr}'")
for n in range(start, end + 1):
# We define the looping range on whether we have odd, even or all numbers
house_number_range = range(start, end + 1)
if has_odd:
house_number_range = [x for x in house_number_range if x % 2 != 0]
if has_even:
house_number_range = [x for x in house_number_range if x % 2 == 0]
for n in house_number_range:
new = row.copy()
new_addr = RANGE_RE.sub(str(n), addr, count=1)
original_full_address = new[self.STANDARD_FULL_ADDRESS]
@ -2108,9 +2153,9 @@ class AssetList:
expanded_rows.append(new)
continue
# 2 ─ Explicit list (e.g. 1, 2, 5 Block)
# 2 ─ Explicit list (e.g. 1, 2, 5 Block) or split by an ampersand (e.g. 1 & 2 Block)
nums = NUM_RE.findall(addr)
if len(nums) > 1 and ',' in addr:
if len(nums) > 1 and (',' in addr or '&' in addr):
for n in nums:
new = row.copy()
new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only
@ -2320,7 +2365,7 @@ class AssetList:
self.standardised_asset_list["cavity_reason"] = np.where(
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks),
self.standardised_asset_list["cavity_reason"]
+ " " + "(Flat in block with more than 50% eligible, but not eligible itself)",
+ " " + "(Flat in block with more than 50% eligible)",
self.standardised_asset_list["cavity_reason"]
)
@ -2375,6 +2420,11 @@ class AssetList:
none_details = [x for x in details_colnames if x is None]
details_colnames = [x for x in details_colnames if x is not None]
if local_filepath is None:
# Create an empty DataFrame based on the fields in self.contact_detail_fields
self.contact_details = pd.DataFrame(columns=list(self.contact_detail_fields.keys()))
return
contact_details = pd.read_excel(
local_filepath, sheet_name=sheet_name
)[[self.contact_detail_fields["landlord_property_id"]] + details_colnames]
@ -2486,10 +2536,14 @@ class AssetList:
if reconcile_programme:
programme_data = programme_data[~pd.isnull(programme_data["project_code"])]
else:
if programme_data["hubspot_status"].nunique() > 1:
logger.info("Multiple hubspot_status found - are you sure you don't want to reconcile the programme?")
ready_to_be_scheduled = (
(
programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
) & (~pd.isnull(programme_data["survey_date"]))
)
)
# completed_works = (
# (programme_data["hubspot_status"] !=
@ -2540,13 +2594,13 @@ class AssetList:
)
else:
# We shouldn't have any missing products
programme_data = programme_data[
~pd.isnull(programme_data["survey_date"])
]
# programme_data = programme_data[
# ~pd.isnull(programme_data["survey_date"])
# ]
if pd.isnull(programme_data["domna_product"]).sum():
raise ValueError("Missing products")
programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
product_df = (
pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]]
@ -2587,6 +2641,13 @@ class AssetList:
programme_data[self.EPC_API_DATA_NAMES["uprn"]]
)
# Remove any negative URPSN which are not valid
programme_data[uprn_column] = np.where(
programme_data["estimated"].isin([1, True]),
None,
programme_data[uprn_column]
)
# Add in some columns if we have them
date_of_inspections = (
"Non-Intrusives: Date of Inspection" if
@ -2753,6 +2814,7 @@ class AssetList:
columns={v: k for k, v in schema_mappings.items() if v is not None}
)
programme_data['Postcode <DEAL postcode>'] = programme_data['Postcode <LISTING hs_zip>'].copy()
programme_data['Installer <DEAL installer>'] = installer_name
programme_data['Name <LISTING hs_name>'] = (
programme_data['Full Address <LISTING full_address>'] + " ," + programme_data['Postcode <LISTING hs_zip>']
@ -2951,7 +3013,7 @@ class AssetList:
outcomes["row_id"] = outcomes.index
if outcomes_houseno[idx] is None:
outcomes_houseno = "houseno"
outcomes_houseno[idx] = "houseno"
outcomes["houseno"] = outcomes[outcomes_address[idx]].apply(
lambda x: SearchEpc.get_house_number(x, outcomes[outcomes_postcode])
)
@ -3219,12 +3281,21 @@ class AssetList:
install_col = "INSTALL / CANCELLATION DATE"
elif 'INSTALL/ CANCELLATION DATE' in master_data.columns:
install_col = 'INSTALL/ CANCELLATION DATE'
elif "INSTALL/CANCELLATION DATE" in master_data.columns:
install_col = "INSTALL/CANCELLATION DATE"
elif 'Measure 1 Install Date' in master_data.columns:
install_col = 'Measure 1 Install Date'
else:
raise ValueError("No install or cancellation date")
submission_col = (
"SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
)
if "SUBMISSION DATE" in master_data.columns:
submission_col = "SUBMISSION DATE"
elif "SUBMISSION DATE TO INSTALLERS" in master_data.columns:
submission_col = "SUBMISSION DATE TO INSTALLERS"
elif "Submission Date" in master_data.columns:
submission_col = "Submission Date"
else:
raise ValueError("No submission date column found in master data")
master_data["row_id"] = master_data.index
@ -3239,6 +3310,10 @@ class AssetList:
scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION"
elif "AFFORDABLE WARMTH" in master_data.columns:
scheme_col = "AFFORDABLE WARMTH"
elif "Scheme" in master_data.columns:
scheme_col = "Scheme"
elif "Affordable Warmth" in master_data.columns:
scheme_col = "Affordable Warmth"
else:
scheme_col = "OFFICE USE ONLY"
@ -3254,12 +3329,30 @@ class AssetList:
property_type_col = "PROPERTY TYPE As per table emailed"
elif "PROPERTY TYPE As per table emailed" in master_data.columns:
property_type_col = "PROPERTY TYPE As per table emailed"
elif "PROPERTY TYPE" in master_data.columns:
property_type_col = "PROPERTY TYPE"
else:
property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)"
if "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" in master_data.columns:
installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
elif "INSTALLERS NOTES" in master_data.columns:
installer_notes_col = "INSTALLERS NOTES"
elif 'Installers Notes' in master_data.columns:
installer_notes_col = 'Installers Notes'
elif 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' in master_data.columns:
installer_notes_col = 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM'
else:
raise ValueError("No installer notes column found in master data")
if "INSTALLER" in master_data.columns:
installer_col = "INSTALLER"
elif "Installer" in master_data.columns:
installer_col = "Installer"
else:
raise ValueError("No installer column found in master data")
measure_mix_col = "MEASURE COMBO"
installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
installer_col = "INSTALLER"
town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area'
logger.info("Matching master data to asset list")
@ -3301,6 +3394,10 @@ class AssetList:
]
house_no = row[house_no_col]
if pd.isnull(house_no):
house_no = None
if isinstance(house_no, (float, int)):
house_no = str(int(house_no))
@ -3401,6 +3498,9 @@ class AssetList:
master_data[measure_mix_col] = "Measure mix not recorded"
matched = pd.DataFrame(matched)
if matched.empty:
continue
master_to_append = master_data[
[scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col]
].merge(

229
asset_list/abs_estimates.py Normal file
View file

@ -0,0 +1,229 @@
"""
Simple script to take a standardised asset list and calculate the abs. We'll use this code to estimate
the ABS for properties, going forward
"""
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
from backend.Funding import Funding
from backend.app.utils import sap_to_epc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Livewest South-West - Standardised V2.xlsx",
sheet_name="Cavity Route (Insta Review)"
)
abs_matrix = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
)
pps_matrix = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx",
header=1
)
pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
# We need to estimate the number of points the work will produce and the finishing band. For this, we assume 7 for
# cavity and 15 for solar. We'll be more specific in the future, but for now, this is a good enough estimate.
route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "upr"}
)
route["address"] = route["address"].astype(str)
asset_list_epc_client = AssetListEpcData(
asset_list=route,
epc_auth_token=EPC_AUTH_TOKEN
)
asset_list_epc_client.get_data()
asset_list_epc_client.get_non_invasive_recommendations()
solar_sap_points = []
for r in asset_list_epc_client.non_invasive_recommendations:
if not r.get("recommendations"):
continue
solar_recommendations = [
x for x in r["recommendations"] if "solar_pv" in x["type"]
]
if solar_recommendations:
solar_recommendations = solar_recommendations[0]
else:
continue
address = r["address"]
postcode = r["postcode"]
solar_sap_points.append(
{
"address": address,
"postcode": postcode,
"sap_points": solar_recommendations["sap_points"]
}
)
solar_sap_points = pd.DataFrame(solar_sap_points)
solar_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
# Store the sap points in the cavity route to csv
# cwi_sap_points.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv",
# index=False
# )
avg_solar_points_by_postcode = solar_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
avg_solar_points = solar_sap_points["sap_points"].median()
asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str)
asset_list = asset_list.merge(
solar_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
).drop(
columns=["address", "postcode"]
)
# Fill the sap points with the average cwi points
asset_list = asset_list.merge(
avg_solar_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
how="left", on=["domna_postcode"], suffixes=("", "_avg")
)
asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
asset_list.drop(columns=["sap_points_avg"], inplace=True)
asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_solar_points)
asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
asset_list["ending_half_band"] = np.where(
(asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]),
"Low_C",
asset_list["ending_half_band"]
)
# Realistically, we'll take the properties to a low C at worst
asset_list["ending_half_band"] = np.where(
(asset_list["post_works_sap"] < 69),
"Low_C",
asset_list["ending_half_band"]
)
asset_list = asset_list.merge(
abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
)
asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
asset_list = asset_list.rename(
columns={"Cost Savings": "funding_abs"}
)
print(asset_list["domna_property_id"].duplicated().sum())
# Store this data
asset_list.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_solar_abs_estimates-solar.csv",
index=False
)
# Cavity process!
# cwi_sap_points = []
# for r in asset_list_epc_client.non_invasive_recommendations:
# if not r.get("recommendations"):
# continue
# cwi_recommendations = [
# x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"]
# ]
# if cwi_recommendations:
# cwi_recommendations = cwi_recommendations[0]
# else:
# continue
#
# address = r["address"]
# postcode = r["postcode"]
#
# cwi_sap_points.append(
# {
# "address": address,
# "postcode": postcode,
# "sap_points": cwi_recommendations["sap_points"]
# }
# )
#
# cwi_sap_points = pd.DataFrame(cwi_sap_points)
# cwi_sap_points = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv"
# )
# cwi_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
avg_cwi_points_by_postcode = cwi_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
avg_cwi_points = cwi_sap_points["sap_points"].median()
asset_list = asset_list.merge(
cwi_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
).drop(
columns=["address", "postcode"]
)
# Fill the sap points with the average cwi points
asset_list = asset_list.merge(
avg_cwi_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
how="left", on=["domna_postcode"], suffixes=("", "_avg")
)
asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
asset_list.drop(columns=["sap_points_avg"], inplace=True)
asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_cwi_points)
asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
asset_list["funding_scheme"] = np.where(
(
(asset_list["post_works_epc"] == asset_list["epc_rating_on_register"])
),
"GBIS",
"ECO4"
)
asset_list = asset_list.merge(
abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
)
asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
# Using CWI solid 1.7 -> 0.3 rates
cwi_pps_matrix = pps_matrix[
pps_matrix["Measure_Type"].isin(["CWI_0.033"])
]
# Merge on
asset_list = asset_list.merge(
cwi_pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename(
columns={
"Cost Savings": "partial_project_score",
"Starting Band": "starting_half_band",
"Total Floor Area Band": "floor_area_band"
}
),
how="left",
on=["starting_half_band", "floor_area_band"],
)
asset_list["partial_project_score"] = np.where(
(asset_list["epc_sap_score_on_register"] > 69),
None,
asset_list["partial_project_score"]
)
asset_list["funding_abs"] = np.where(
asset_list["funding_scheme"] == "GBIS",
asset_list["partial_project_score"],
asset_list["Cost Savings"]
)
asset_list["domna_property_id"].duplicated().sum()
# Store this data
asset_list.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_abs_estimates.csv",
index=False
)

View file

@ -1,7 +1,6 @@
import os
import json
import pandas as pd
from pprint import pprint
from asset_list.AssetList import AssetList
from asset_list.mappings.property_type import PROPERTY_MAPPING
from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
@ -60,39 +59,370 @@ def app():
Property UPRN
"""
# NCHA
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
data_filename = "Energy Information MASTER June 2025.xlsx"
sheet_name = "Data"
# CDS
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS"
data_filename = "Founder Estates - Asset List.xlsx"
sheet_name = "Combined"
postcode_column = 'Postcode'
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Build Date (HAR10)"
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Property Type (HAR10)"
landlord_built_form = "Build Form (EPC)"
landlord_wall_construction = "Wall Description"
landlord_roof_construction = None
landlord_heating_system = "HEAT Code"
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
landlord_property_id = "Place ref"
landlord_sap = "EPC SAP"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
landlord_property_id = "Row ID"
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_address = []
outcomes_id = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
master_id_colnames = []
landlord_roof_construction = None
phase = False
landlord_sap = None
ecosurv_landlords = None
# Plus Dane
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/"
data_filename = "20250711 Plus Dane Asset List.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Property Age"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = "Built Form"
landlord_wall_construction = "Wall Construction"
landlord_heating_system = "Full Heating System"
landlord_existing_pv = None
landlord_property_id = "UPRN"
outcomes_filename = [
os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"),
os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"),
os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"),
]
outcomes_sheetname = [
"CWI & LI - 2024", "2025 - CWI", "PV - 2025",
]
outcomes_postcode = ["Postcode", "Postcode", "Postcode"]
outcomes_houseno = ["No.", "No", "No"]
outcomes_address = ["Address", "Address", "Address"]
outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"]
master_filepaths = [
os.path.join(data_folder, "submissions/JJC-Table 1.csv"),
os.path.join(data_folder, "submissions/SCIS-Table 1.csv")
]
master_to_asset_list_filepath = None
asset_list_header = 1
landlord_block_reference = None
master_id_colnames = [None, None]
landlord_roof_construction = None
phase = False
landlord_sap = "SAP Rating"
ecosurv_landlords = "plus dane"
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme"
# data_filename = "20250710 Asset List Brentwood.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# fulladdress_column = None
# address1_column = "House Number"
# address1_method = None
# address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"]
# missing_postcodes_method = None
# landlord_year_built = "Year Built"
# landlord_os_uprn = None
# landlord_property_type = "Dwelling"
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_heating_system = "Heating"
# landlord_existing_pv = None
# landlord_property_id = "UPRN"
# outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")]
# outcomes_sheetname = ["OUTCOMES"]
# outcomes_postcode = ["POSTCODE"]
# outcomes_houseno = [None]
# outcomes_address = ["ADDRESS"]
# outcomes_id = [None]
# master_filepaths = [os.path.join(data_folder, "Submissions.csv")]
# master_to_asset_list_filepath = None
# asset_list_header = 1
# landlord_block_reference = None
# master_id_colnames = [None]
# landlord_roof_construction = None
# phase = False
# landlord_sap = None
# ecosurv_landlords = "brentwood"
# Brentwood
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme"
# data_filename = "20250710 Asset List Brentwood.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# fulladdress_column = None
# address1_column = "House Number"
# address1_method = None
# address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"]
# missing_postcodes_method = None
# landlord_year_built = "Year Built"
# landlord_os_uprn = None
# landlord_property_type = "Dwelling"
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_heating_system = "Heating"
# landlord_existing_pv = None
# landlord_property_id = "UPRN"
# outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")]
# outcomes_sheetname = ["OUTCOMES"]
# outcomes_postcode = ["POSTCODE"]
# outcomes_houseno = [None]
# outcomes_address = ["ADDRESS"]
# outcomes_id = [None]
# master_filepaths = [os.path.join(data_folder, "Submissions.csv")]
# master_to_asset_list_filepath = None
# asset_list_header = 1
# landlord_block_reference = None
# master_id_colnames = [None]
# landlord_roof_construction = None
# phase = False
# landlord_sap = None
# ecosurv_landlords = "brentwood"
#
# # Eastlight
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme"
# data_filename = "INSPECTIONS MASTER Non Tech.xlsx"
# sheet_name = "EASTLIGHT CW"
# postcode_column = 'Postcode'
# fulladdress_column = None
# address1_column = "HouseName"
# address1_method = None
# address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"]
# missing_postcodes_method = None
# landlord_year_built = "Built In Year"
# landlord_os_uprn = None
# landlord_property_type = "AssetType"
# landlord_built_form = "Archetype" # Using inspections archetype
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = "Main Heating Source"
# landlord_existing_pv = None
# landlord_property_id = "UPRN"
# landlord_sap = "SAP Score"
# outcomes_filename = [
# os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"),
# os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"),
# ]
# outcomes_sheetname = ["Outcomes", "Feedback"]
# outcomes_postcode = ["Postcode", "Postcode"]
# outcomes_houseno = ["No", "No."]
# outcomes_id = [None, None]
# outcomes_address = ["Address", "Address"]
# master_filepaths = [
# os.path.join(data_folder, "ECO 3-Table 1.csv"),
# os.path.join(data_folder, "ECO 4-Table 1.csv"),
# ]
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = "eastlight"
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = [None, None]
# landlord_sap = None
# Pickering and Ferens
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens"
# data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx"
# sheet_name = "Sava Intelligent Energy - Prope"
# postcode_column = 'Postcode'
# fulladdress_column = 'Address'
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = "Property Type" # Using the inspections property type
# landlord_built_form = "Archetype 2"
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "UPRN"
# landlord_sap = "SAP Rating (RdSAP 10)"
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = [
# os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"),
# os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"),
# ]
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = "pickering"
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = [None, None]
# Colchester
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Full Address.1'
# fulladdress_column = "Full Address"
# address1_column = None
# address1_method = "first_word"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Date"
# landlord_os_uprn = None
# landlord_property_type = "Property Type"
# landlord_wall_construction = "Wallinsul"
# landlord_heating_system = "HeatSorc"
# landlord_existing_pv = None
# landlord_property_id = "Property Reference"
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_to_asset_list_filepath = None
# asset_list_header = 0
# landlord_built_form = None
# landlord_roof_construction = None
# landlord_sap = None
# landlord_block_reference = None
# phase = False
# ecosurv_landlords = None
# master_id_colnames = []
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot"
# data_filename = "EalingFlats.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None # Using the inspections property type
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "Property ref"
# landlord_sap = None
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = "Block Ref"
# master_id_colnames = []
# Southern - Jan list
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List"
# data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx"
# sheet_name = "Jan 2025 additions"
# postcode_column = 'Post Code'
# fulladdress_column = None
# address1_column = "NO."
# address1_method = None
# address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"]
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None # Using the inspections property type
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "SH Property Reference"
# landlord_sap = None
# outcomes_filename = [
# os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"),
# os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"),
# ]
# outcomes_sheetname = ["Feedback", "Collated"]
# outcomes_postcode = ["Poscode", "Postcode"]
# outcomes_houseno = ["No.", "No"]
# outcomes_id = ["UPRNs", None]
# outcomes_address = ["Address", "Address"]
# master_filepaths = [
# os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"),
# os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"),
# os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"),
# os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"),
# ]
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = "southern"
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = [None, None, None, None]
# NCHA
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
# data_filename = "Energy Information MASTER June 2025.xlsx"
# sheet_name = "Data"
# postcode_column = 'Postcode'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Date (HAR10)"
# landlord_os_uprn = None
# landlord_property_type = "Property Type (HAR10)"
# landlord_built_form = "Build Form (EPC)"
# landlord_wall_construction = "Wall Description"
# landlord_roof_construction = None
# landlord_heating_system = "HEAT Code"
# landlord_existing_pv = None
# landlord_property_id = "Place ref"
# landlord_sap = "EPC SAP"
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = None
# master_id_colnames = []
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
# data_filename = "07.04 CALICO - Final List.xlsx"
@ -593,6 +923,7 @@ def app():
# We now flag the status of the property
asset_list.label_property_status()
asset_list.analyse_geographies()
asset_list.get_work_figures()
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"

View file

@ -17,7 +17,7 @@ class HubspotProcessStatus(IntEnum):
# The property didn't get access and needs sign off
SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
# The survey has been completed. We don't have any update as to whether the property has been installed
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - AUTOMATED SIGNED OFF"
# The property turned out to be ineligibile
NOT_VIABLE = 4, "NOT VIABLE"
# The property is with the installer. This will likely be the default for historic programmes
@ -79,7 +79,7 @@ CRM_UPLOAD_COLUMNS = [
'Last EPC: Room Height <LISTING last_epc__room_height>',
'Last EPC: Age Band <LISTING last_epc__age_band>', 'Deal Stage <DEAL dealstage>',
'Pipeline <DEAL pipeline>', 'Expected Commencement Date <DEAL expected_commencement_date>',
'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>',
'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>', 'Postcode <DEAL postcode>',
'Product ID <LINE_ITEM hs_product_id>', 'Name <LINE_ITEM name>', 'Unit price <LINE_ITEM price>',
'Quantity <LINE_ITEM quantity>', 'Deal Owner', 'Amount <DEAL amount>', 'Installer <DEAL installer>'
]

View file

@ -2,6 +2,32 @@ import os
import pandas as pd
from asset_list.AssetList import AssetList
import re
def normalize_uk_phone(number: str | float | int) -> str | None:
if pd.isna(number):
return None
number = str(number)
number = re.sub(r"[^\d+]", "", number)
# Handle common short inputs: add '0' if likely missing
if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number):
number = "0" + number
# Convert to international format
if number.startswith("0"):
number = "+44" + number[1:]
elif number.startswith("0044"):
number = "+" + number[2:]
# Must be +44 followed by 10 digits (some area codes may vary)
if re.match(r"^\+44\d{9,10}$", number):
return number
return None
def app():
"""
@ -18,32 +44,28 @@ def app():
"""
# inputs:
reconcile_programme = False # If True, the hubspot upload will include all properties with a project code
customer_domain = "https://sandwell.gov.uk"
installer_name = "J & J CRUMP"
reconcile_programme = True # If True, the hubspot upload will include all properties with a project code
customer_domain = "https://ealing.gov.uk"
installer_name = "SCIS"
asset_list_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
"Standardised.xlsx"
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
"programme.xlsx"
)
asset_list_sheet_name = "Proposed Program"
asset_list_header = 1
asset_list_sheet_name = "Standardised Asset List"
asset_list_header = 0
contact_details_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
)
contacts_sheet_name = "Sheet1"
contacts_landlord_property_id = "landlord_property_id"
contact_details_filepath = None
contacts_sheet_name = "Sheet 1"
contacts_landlord_property_id = "UPRN"
contacts_phone_number_column = "phone_number"
contacts_secondary_phone_number_column = "secondary_phone_number"
contacts_secondary_contact_full_name = "secondary_contact_full_name"
contacts_email_column = "email"
contacts_fullname_column = "fullname"
contacts_firstname_column = "firstname"
contacts_lastname_column = "lastname"
contacts_firstname_column = "First Name"
contacts_lastname_column = "Last Name"
existing_programme_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv"
)
existing_programme_filepath = None
asset_list = AssetList.load_standardised_asset_list(
asset_list_filepath, asset_list_sheet_name, asset_list_header
@ -68,12 +90,12 @@ def app():
)
# Remove the existing programme
existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
asset_list.hubspot_data = asset_list.hubspot_data[
~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
existing_programme['Domna Property ID'].values
)
]
# existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
# asset_list.hubspot_data = asset_list.hubspot_data[
# ~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
# existing_programme['Domna Property ID'].values
# )
# ]
# Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
directory, filename = os.path.split(asset_list_filepath)
@ -89,3 +111,66 @@ def app():
# Just store locally
asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
# # TODO: Set this up separately, but we associate multiple contacts to the same deal
# contact_details = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot "
# "Upload/Hubspot/contact "
# "details.csv"
# )
#
# # contacts_phone_number_column = "phone_number"
# # contacts_secondary_phone_number_column = "secondary_phone_number"
# # contacts_secondary_contact_full_name = "secondary_contact_full_name"
# # contacts_email_column = "email"
# # contacts_fullname_column = "fullname"
# # contacts_firstname_column = "First Name"
# # contacts_lastname_column = "Last Name"
# contact_details["phone_number"] = contact_details["Mobile Phone"].copy()
# # If phone number is NaN, we will use the landline number
# contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"])
# contact_details["secondary_phone_number"] = contact_details["Landline"].copy()
# # If secondary phone number is the same as primary, we remove it
# import numpy as np
# contact_details["secondary_phone_number"] = np.where(
# contact_details["secondary_phone_number"] == contact_details["phone_number"],
# np.nan,
# contact_details["secondary_phone_number"]
# )
# contact_details = contact_details[
# ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number",
# "secondary_phone_number", "First Name", "Last Name"]].copy().rename(
# columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"}
# )
# contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"]
# # Format the phone numbers
#
# contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply(
# normalize_uk_phone)
# contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype(
# str).apply(
# normalize_uk_phone)
#
# # Add in the Hubspot deal data
# hubspot_data = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/"
# "property-status.csv",
# encoding="utf-8-sig"
# )
# # Merge on contact details
# contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge(
# contact_details,
# how="left",
# right_on="landlord_proprty_id",
# left_on="Landlord Property ID"
# )
#
# contact_details = contact_details.drop(columns=["landlord_proprty_id"])
#
# # Store as csv
# contact_details.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar "
# "Programme Hubspot Upload/Hubspot/"
# "contact_details.csv",
# index=False, encoding="utf-8-sig"
# )

View file

@ -3,7 +3,7 @@ import numpy as np
STANDARD_BUILT_FORMS = {
"unknown",
# Houses
"end-terrace", "semi-detached", "detached", "mid-terrace",
"end-terrace", "semi-detached", "detached", "mid-terrace", "enclosed mid-terrace", "enclosed end-terrace",
# Flats
"ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise",
}
@ -358,6 +358,19 @@ BUILT_FORM_MAPPINGS = {
'1983- 90 SEMI DET': 'semi-detached',
'1983-90 MID TERR': 'mid-terrace',
'1976-82 SEMI DET': 'semi-detached',
'PRE 1900 MID TERR': 'mid-terrace'
'PRE 1900 MID TERR': 'mid-terrace',
None: 'unknown',
'SEMI-DETACHED': 'semi-detached',
'DETACHED': 'detached',
'MID TERRACE': 'mid-terrace',
'END TERRACE': 'end-terrace',
'ENCLOSED MID': 'enclosed mid-terrace',
'BUILDING': 'unknown',
'FLAT COMMUNAL FACILITIES': 'unknown',
'MAISONETTE': 'unknown',
'HOUSE': 'unknown',
'FLAT': 'unknown',
'BLOCK': 'unknown'
}

View file

@ -28,6 +28,7 @@ STANDARD_HEATING_SYSTEMS = {
"electric underfloor",
"no heating",
"non-electric underfloor",
"warm air heating",
}
HEATING_MAPPINGS = {
@ -326,5 +327,42 @@ HEATING_MAPPINGS = {
'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler',
'GREENSTAR 30SI COMPACT': 'gas combi boiler',
'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler',
'Not applicable for this asset type': 'unknown'
'Not applicable for this asset type': 'unknown',
'Boiler: F rated Regular Boiler': 'gas condensing boiler',
'Warm Air Systems: Electric warm air: Electricaire system': 'warm air heating',
'Boiler: B rated Combi': 'gas condensing combi',
'Boiler: G rated Regular Boiler': 'gas condensing boiler',
'Electric Storage Systems: Modern (slimline) storage heaters': 'electric storage heaters',
'Boiler: C rated CPSU': 'gas condensing combi',
'Boiler: D rated Regular Boiler': 'gas condensing boiler',
'Warm Air Systems: Gas fired warm air with balanced or open flue: Ducted or stub-ducted, on-off control, '
'pre 1998': 'warm air heating',
'Electric Storage Systems: Integrated storage+direct-acting heater': 'electric storage heaters',
'Boiler: D rated Combi': 'gas condensing combi',
'Heat Pump: (from database)': 'air source heat pump',
'Community Heating Systems: Community CHP and boilers (RdSAP)': 'communal heating',
'': 'unknown',
'Solid Fuel Boiler': 'solid fuel',
'Heating (Other)': 'other',
'Solid Fuel Fire Only': 'solid fuel',
'No Main Heat Source': 'no heating',
'Electric Programmable': 'electric storage heaters',
'Linked to Communal Boiler': 'communal heating',
'Bio Mass Boiler': 'solid fuel',
'Electric Non Programmable': 'electric storage heaters',
'Room heaters, Mains gas': 'room heaters',
'Boiler, Solid fuel': 'solid fuel',
'Room heaters, Electricity': 'room heaters',
'Room heaters, Solid fuel': 'room heaters',
'Boiler, Oil': 'oil boiler',
'Boiler, Biomass': 'boiler - other fuel',
'Community heating, Community (non-gas)': 'communal heating',
'Heat pump (wet), Electricity': 'air source heat pump',
'Community heating, Community (mains gas)': 'communal gas boiler',
'Boiler, Electricity': 'electric boiler',
'Boiler, LPG': 'gas boiler, radiators',
'Boiler, Mains gas': 'gas boiler, radiators',
'Storage heating, Electricity': 'electric storage heaters'
}

View file

@ -256,7 +256,6 @@ PROPERTY_MAPPING = {
'HOUSE (3 STOREY)': 'house',
'FLAT GROUND FLOOR': 'flat',
'FLAT TOP FLOOR': 'flat',
'SHARED HOUSE': 'house',
'MAISONETTE': 'maisonette',
'DIRECT ACCESS HOSTEL': 'other',
@ -266,5 +265,11 @@ PROPERTY_MAPPING = {
'SHOP': 'other',
'Office Block': 'other',
'BLOCK (Non-Communal)': 'block of flats',
'Refuge': 'other'
'Refuge': 'other',
None: 'unknown',
'HFOP FLAT': 'flat',
'HFOP BEDSIT': 'bedsit',
'LINKED FLAT': 'flat',
'LINKED BUNGALOW': 'bungalow'
}

View file

@ -9,6 +9,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
"pitched less than 100mm insulation",
"another dwelling above",
"flat unknown insulation",
"flat insulated",
"unknown insulated",
"unknown",
}
@ -51,5 +52,127 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'100MM': 'pitched less than 100mm insulation',
'U/K': 'unknown',
'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation',
'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation'
'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 100mm': 'another dwelling above',
'PitchedNormalNoLoftAccess: 150mm': 'pitched insulated',
'PitchedNormalLoftAccess: As Built, PitchedNormalNoLoftAccess: None': 'pitched insulated',
'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 200mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 200mm': 'pitched insulated',
'PitchedNormalNoLoftAccess: 50mm': 'unknown',
'PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 150mm': 'unknown', 'Flat: None': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation',
'PitchedNormalNoLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation',
'Flat: Unknown, PitchedNormalLoftAccess: 200mm, SameDwellingAbove: Unknown': 'flat unknown insulation',
'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'flat unknown insulation',
'PitchedNormalNoLoftAccess: 250mm': 'pitched insulated',
'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 300mm': 'another dwelling above',
'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: 50mm': 'pitched insulated',
'Flat: As Built, PitchedNormalNoLoftAccess: 100mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 150mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 200mm': 'pitched less than 100mm insulation',
'PitchedNormalNoLoftAccess: 75mm': 'pitched less than 100mm insulation',
'Flat: As Built, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 150mm, SameDwellingAbove': 'pitched insulated',
'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 100mm': 'flat unknown insulation',
'Flat: As Built, PitchedNormalNoLoftAccess: None': 'flat unknown insulation',
'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 200mm': 'pitched insulated',
'PitchedNormalNoLoftAccess: 300mm': 'pitched insulated',
'Flat: As Built, PitchedNormalNoLoftAccess: 150mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 150mm, PitchedNormalNoLoftAccess: None': 'pitched insulated',
'PitchedNormalNoLoftAccess: 200mm': 'pitched insulated',
'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: None': 'pitched less than 100mm insulation',
'Flat: As Built': 'flat unknown insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 250mm': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 50mm': 'another dwelling above',
'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched '
'insulated',
'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated',
'Flat: 50mm': 'flat unknown insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: None': 'another dwelling above',
'PitchedNormalNoLoftAccess: None': 'pitched uninsulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 25mm': 'another dwelling above',
'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above',
'Flat: As Built, PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
'Flat: Unknown, PitchedNormalLoftAccess: 75mm, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: Unknown': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 200mm, PitchedNormalLoftAccess: 300mm': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, Flat: As Built, PitchedNormalLoftAccess: 150mm': 'another dwelling above',
'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'Flat: As Built, PitchedNormalLoftAccess: 300mm': 'unknown', 'Flat: 100mm': 'flat unknown insulation',
'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalNoLoftAccess: 100mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 12mm': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 150mm': 'another dwelling above',
'PitchedNormalLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 25mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None, PitchedNormalNoLoftAccess: Unknown': 'pitched '
'insulated',
'PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'Flat: As Built, PitchedNormalNoLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalNoLoftAccess: Unknown, SameDwellingAbove: Unknown': 'pitched no access to loft',
'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: None': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: As Built': 'pitched less than 100mm insulation',
'PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 50mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: None': 'pitched insulated',
'Flat: 100mm, Flat: As Built': 'flat unknown insulation',
'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: No Insulation': 'another dwelling above',
'PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: None': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 300mm': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 270mm': 'pitched insulated',
'PitchedNormalNoLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: 200mm, PitchedNormalNoLoftAccess: Unknown': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 250mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 50mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 300mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated',
'Flat: As Built, PitchedNormalNoLoftAccess: 250mm': 'flat unknown insulation',
'Flat: As Built, PitchedNormalNoLoftAccess: 50mm': 'flat unknown insulation',
'Flat: As Built, PitchedNormalLoftAccess: 75mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 250mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched insulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 150mm': 'another dwelling above',
'PitchedNormalLoftAccess: 75mm, PitchedNormalNoLoftAccess: No Insulation': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: Unknown': 'another dwelling above',
'Flat: As Built, PitchedNormalNoLoftAccess: 200mm': 'flat unknown insulation',
'Flat: As Built, Flat: Unknown, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation',
'Flat: As Built, PitchedNormalLoftAccess: 150mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 300mm, PitchedNormalNoLoftAccess: 100mm': 'pitched insulated',
'PitchedNormalLoftAccess: 100mm, PitchedNormalLoftAccess: 75mm': 'pitched less than 100mm insulation',
'AnotherDwellingAbove: Unknown, PitchedNormalNoLoftAccess: 50mm, PitchedNormalNoLoftAccess: No Insulation':
'another dwelling above',
'Flat: As Built, PitchedNormalLoftAccess: 50mm': 'flat unknown insulation',
'PitchedNormalLoftAccess: 25mm': 'pitched less than 100mm insulation',
'PitchedNormalLoftAccess: 50mm, PitchedNormalNoLoftAccess: Unknown': 'pitched less than 100mm insulation',
'PitchedNormalNoLoftAccess: 150mm, PitchedNormalNoLoftAccess: Unknown, PitchedThatched: 25mm': 'pitched insulated',
'Flat: 150mm+': 'flat insulated',
'Flat: Unknown, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: Unknown': 'pitched insulated',
'PitchedNormalLoftAccess: 150mm, PitchedNormalLoftAccess: 250mm': 'pitched insulated',
'Flat: As Built, PitchedNormalLoftAccess: 100mm, PitchedNormalNoLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalLoftAccess: 250mm': 'pitched insulated',
'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 75mm': 'pitched insulated',
'PitchedNormalLoftAccess: 250mm, PitchedNormalLoftAccess: 50mm': 'pitched insulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 200mm': 'another dwelling above',
'PitchedNormalNoLoftAccess: Unknown': 'pitched no access to loft',
'PitchedNormalLoftAccess: Unknown': 'pitched unknown insulation',
'AnotherDwellingAbove: Unknown': 'another dwelling above'
}

View file

@ -7,122 +7,163 @@ STANDARD_WALL_CONSTRUCTIONS = {
"uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation",
# Timber Frame
"timber frame unknown insulation", "insulated timber frame", "uninsulated timber frame",
"system built", "granite or whinstone", "other",
"unknown", "sandstone or limestone",
# System
"system built unknown insulation", "insulated system built", "uninsulated system built",
# Granite or Whinstone
"granite or whinstone unknown insulation", "insulated granite or whinstone", "uninsulated granite or whinstone",
# Sandstone or Limestone
"sandstone or limestone unknown insulation", "insulated sandstone or limestone",
"uninsulated sandstone or limestone",
# Other
"other",
"cob",
"new build - average thermal transmittance",
}
WALL_CONSTRUCTION_MAPPINGS = {
"New Build - Average Thermal Transmittance": "new build - average thermal transmittance",
'Average thermal transmittance 0.25 W/m?K': 'unknown',
'Average thermal transmittance 0.25 W/m?K': 'new build - average thermal transmittance',
'Cavity wall, as built, insulated (assumed)': 'filled cavity',
'Average thermal transmittance 0.31 W/m?K': 'unknown',
'Cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
'Average thermal transmittance 0.30 W/m?K': 'unknown', 'Average thermal transmittance 0.28 W/m-¦K': 'unknown',
'Average thermal transmittance 0.25 W/m-¦K': 'unknown', 'Average thermal transmittance 0.21 W/m-¦K': 'unknown',
'Average thermal transmittance 0.20 W/m-¦K': 'unknown', 'Average thermal transmittance 0.29 W/m?K': 'unknown',
'Average thermal transmittance 0.16 W/m?K': 'unknown',
'Average thermal transmittance 0.27 W/m&#0178;K': 'unknown',
'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown',
'Average thermal transmittance 0.18 W/m?K': 'unknown',
'Granite or whin, with internal insulation': 'granite or whinstone',
"Granite or whinstone, as built, insulated (assumed)": "granite or whinstone",
'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown',
'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown',
'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown',
'Average thermal transmittance 0.64 W/m?K': 'unknown', 'Average thermal transmittance 0.61 W/m?K': 'unknown',
'Sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
'Average thermal transmittance 0.33 W/m?K': 'unknown',
'Average thermal transmittance 0.30 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.28 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.25 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.21 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.20 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.29 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.16 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.27 W/m&#0178;K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.15 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.23 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.18 W/m?K': 'new build - average thermal transmittance',
'Granite or whin, with internal insulation': 'insulated granite or whinstone',
"Granite or whinstone, as built, insulated (assumed)": "uninsulated granite or whinstone",
'Average thermal transmittance 0.22 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.24 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.16 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.35 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.26 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.62 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.64 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.61 W/m?K': 'new build - average thermal transmittance',
'Sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone',
'Average thermal transmittance 0.33 W/m?K': 'new build - average thermal transmittance',
'Cavity wall,': "cavity unknown insulation",
'Cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
'Average thermal transmittance 0.29 W/m-¦K': 'unknown', 'Average thermal transmittance 0.32 W/m-¦K': 'unknown',
'Average thermal transmittance 0.19 W/m-¦K': 'unknown', 'Average thermal transmittance 0.27 W/m?K': 'unknown',
'Average thermal transmittance 0.22 W/m?K': 'unknown', 'Average thermal transmittance 0.38 W/m?K': 'unknown',
'Average thermal transmittance 0.26 W/m?K': 'unknown', 'Average thermal transmittance 0.27 W/m-¦K': 'unknown',
'Average thermal transmittance 0.18 W/m-¦K': 'unknown', 'Average thermal transmittance = 0.27 W/m?K': 'unknown',
'Cavity wall, with external insulation': 'filled cavity', 'Average thermal transmittance 0.21 W/m?K': 'unknown',
'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown',
'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown',
'Average thermal transmittance 0.29 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.32 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.19 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.27 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.22 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.38 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.26 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.27 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.18 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance = 0.27 W/m?K': 'new build - average thermal transmittance',
'Cavity wall, with external insulation': 'filled cavity',
'Average thermal transmittance 0.21 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.23 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.20 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.32 W/m?K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.24 W/m-¦K': 'new build - average thermal transmittance',
'Cavity wall, with internal insulation': 'filled cavity',
'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown',
'Average thermal transmittance 0.17 W/m-¦K': 'new build - average thermal transmittance',
'Average thermal transmittance 0.28 W/m?K': 'new build - average thermal transmittance',
'new build - average thermal transmittance': 'new build - average thermal transmittance',
'average thermal transmittance 0.25 w/m?k': 'unknown',
'average thermal transmittance 0.25 w/m?k': 'new build - average thermal transmittance',
'cavity wall, as built, insulated (assumed)': 'filled cavity',
'average thermal transmittance 0.31 w/m?k': 'unknown',
'average thermal transmittance 0.31 w/m?k': 'new build - average thermal transmittance',
'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown',
'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown',
'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown',
'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m&#0178;k': 'unknown',
'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown',
'average thermal transmittance 0.18 w/m?k': 'unknown',
'granite or whin, with internal insulation': 'granite or whinstone',
'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown',
'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown',
'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown',
'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown',
'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': "cavity unknown insulation",
'average thermal transmittance 0.30 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.28 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.25 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.21 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.20 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.29 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.16 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.27 w/m&#0178;k': 'new build - average thermal transmittance',
'average thermal transmittance 0.15 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.23 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.18 w/m?k': 'new build - average thermal transmittance',
'granite or whin, with internal insulation': 'insulated granite or whinstone',
'average thermal transmittance 0.22 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.24 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.16 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.35 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.26 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.62 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.64 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.61 w/m?k': 'new build - average thermal transmittance',
'sandstone or limestone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone',
'average thermal transmittance 0.33 w/m?k': 'new build - average thermal transmittance',
'cavity wall,': "cavity unknown insulation",
'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown',
'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown',
'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown',
'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown',
'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown',
'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown',
'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown',
'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown',
'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown',
'average thermal transmittance 0.28 w/m?k': 'unknown',
'average thermal transmittance 0.29 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.32 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.19 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.27 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.22 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.38 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.26 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.27 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.18 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance = 0.27 w/m?k': 'new build - average thermal transmittance',
'cavity wall, with external insulation': 'filled cavity',
'average thermal transmittance 0.21 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.23 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.20 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.32 w/m?k': 'new build - average thermal transmittance',
'average thermal transmittance 0.24 w/m-¦k': 'new build - average thermal transmittance',
'cavity wall, with internal insulation': 'filled cavity',
'average thermal transmittance 0.17 w/m-¦k': 'new build - average thermal transmittance',
'average thermal transmittance 0.28 w/m?k': 'new build - average thermal transmittance',
'Cavity wall, filled cavity': 'filled cavity',
'Cavity wall, filled cavity and external insulation': 'filled cavity',
'Granite or whinstone, as built, no insulation (assumed)': 'granite or whinstone',
'Granite or whinstone, as built, no insulation (assumed)': 'uninsulated granite or whinstone',
'Solid brick, as built, insulated (assumed)': 'insulated solid brick',
'Solid brick, as built, no insulation (assumed)': 'uninsulated solid brick',
'Solid brick, with external insulation': 'insulated solid brick',
'Solid brick, with internal insulation': 'insulated solid brick',
'System built, as built, insulated (assumed)': 'system built',
'System built, as built, no insulation (assumed)': 'system built',
'System built, with external insulation': 'system built',
'System built, with internal insulation': 'system built',
'Timber frame, as built, insulated (assumed)': 'timber frame',
'Timber frame, as built, no insulation (assumed)': 'timber frame',
'Timber frame, as built, partial insulation (assumed)': 'timber frame',
'Timber frame, with additional insulation': 'timber frame',
'System built, as built, insulated (assumed)': 'insulated system built',
'System built, as built, no insulation (assumed)': 'uninsulated system built',
'System built, with external insulation': 'insulated system built',
'System built, with internal insulation': 'insulated system built',
'Timber frame, as built, insulated (assumed)': 'insulated timber frame',
'Timber frame, as built, no insulation (assumed)': 'uninsulated timber frame',
'Timber frame, as built, partial insulation (assumed)': 'insulated timber frame',
'Timber frame, with additional insulation': 'insulated timber frame',
'CAVITY': 'cavity unknown insulation',
'COMB': 'unknown',
'NONE': 'unknown',
'NOTKNOWN': 'unknown',
'SOLID': 'solid brick unknown insulation',
np.nan: 'unknown',
'RENDER/TIMBER FRAME': 'timber frame',
'SYSTEM BUILT': 'system built',
'RENDER/TIMBER FRAME': 'timber frame unknown insulation',
'SYSTEM BUILT': 'system built unknown insulation',
'PCC PANELS': 'other',
'NOT APPLICABLE - FLAT': 'unknown',
'BRICK/TIMBER FRAME': 'timber frame',
'BRICK/TIMBER FRAME': 'timber frame unknown insulation',
'BRICK/BLOCK CAVITY': 'cavity unknown insulation',
'STONE SOLID': 'sandstone or limestone',
'EXT CLADDING SYSTEM': 'system built',
'STONE SOLID': 'sandstone or limestone unknown insulation',
'EXT CLADDING SYSTEM': 'system built unknown insulation',
'BRICK/BLOCK SOLID': 'solid brick unknown insulation',
'Cavity Filled cavity (with internal/external)': 'filled cavity',
'ND (inferred) Filled cavity': 'filled cavity',
'Cavity Filled cavity': 'filled cavity',
'Cavity Unknown insulation': 'cavity unknown insulation',
'Timber frame As-built': 'timber frame',
'System build Unknown insulation': 'system built',
'Timber frame As-built': 'uninsulated timber frame',
'System build Unknown insulation': 'system built unknown insulation',
'Cavity As-built': 'uninsulated cavity',
'System build External': 'system built',
'System build External': 'insulated system built',
'ND (inferred) ND (inferred)': 'unknown',
'Solid brick External': 'insulated solid brick',
'Cavity External': 'filled cavity',
'System build As-built': 'system built',
'System build As-built': 'uninsulated system built',
'Solid brick Internal': 'insulated solid brick',
'Cavity Internal': 'filled cavity',
'System build Internal': 'system built',
'Solid brick As-built': 'solid brick unknown insulation',
'System build Internal': 'insulated system built',
'Solid brick As-built': 'uninsulated solid brick',
'Cavity ': 'cavity unknown insulation',
'Solid brick ': 'solid brick unknown insulation',
'Timber frame Timber frame (good insulation)': 'insulated timber frame',
@ -141,91 +182,90 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Cavity: Unknown': 'cavity unknown insulation',
'Cavity: AsBuilt (Post 1995)': 'filled cavity',
'Cavity: AsBuilt (1976-1982)': 'cavity unknown insulation',
'SystemBuilt: AsBuilt': 'system built',
'TimberFrame: AsBuilt': "timber frame unknown insulation",
'Cavity: AsBuilt (1983-1995)': 'cavity unknown insulation',
'SystemBuilt: AsBuilt': 'uninsulated system built',
'TimberFrame: AsBuilt': "uninsulated timber frame",
'Cavity: AsBuilt (1983-1995)': 'filled cavity',
'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity',
'SolidBrick: AsBuilt': 'solid brick unknown insulation',
'SolidBrick: AsBuilt': 'uninsulated solid brick',
'Cavity: FilledCavity': 'filled cavity',
'SolidBrick: Internal': 'insulated solid brick',
'Cavity: External': 'filled cavity',
'Sandstone: Internal': 'sandstone or limestone',
'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation',
'System build': 'system built',
'Sandstone: Internal': 'insulated sandstone or limestone',
'Cavity: AsBuilt (Pre 1976)': 'uninsulated cavity',
'System build': 'system built unknown insulation',
'Solid brick': 'solid brick unknown insulation',
'Stone': 'sandstone or limestone',
'Stone': 'sandstone or limestone unknown insulation',
'Timber frame': 'timber frame unknown insulation',
'2017 onwards': 'new build - average thermal transmittance',
'ND (inferred)': 'unknown',
'Flat / maisonette': 'other',
'Other': 'other',
'Flat / maisonette': 'unknown',
'Other': 'unknown',
'Timber Frame': 'timber frame unknown insulation',
'Cavity Wall': 'cavity unknown insulation',
'Non-Traditional': 'system built',
'PRC': 'system built',
'Cross Wall': 'system built',
'Non-Traditional': 'system built unknown insulation',
'PRC': 'system built unknown insulation',
'Cross Wall': 'system built unknown insulation',
'Solid Wall': 'solid brick unknown insulation',
'Traditional': 'unknown',
'Solid': 'solid brick unknown insulation',
'Wates no fines': 'system built',
'Concrete Frame': 'system built',
'PRCWATES': 'system built',
'Refurbished Cornish': 'system built',
'Wates no fines': 'system built unknown insulation',
'Concrete Frame': 'system built unknown insulation',
'PRCWATES': 'system built unknown insulation',
'Refurbished Cornish': 'system built unknown insulation',
'Bailey Stratton': 'other',
'Refurbished Reema': 'system built',
'PRCREEMA': 'system built',
'Trustsell Type': 'system built',
'Refurbished Reema': 'system built unknown insulation',
'PRCREEMA': 'system built unknown insulation',
'Trustsell Type': 'system built unknown insulation',
'Petra Nissan': 'unknown',
'Reinstated Airey': 'system built',
'Refurbished Airey': 'system built',
'Reinstated Airey': 'system built unknown insulation',
'Refurbished Airey': 'system built unknown insulation',
# From Abri- slightly unclear on types but not a large portion of the data
'No Fines Type': 'system built',
'Refurbished Unity': 'system built',
'No Fines Type': 'system built unknown insulation',
'Refurbished Unity': 'system built unknown insulation',
'Timber Framed': 'timber frame unknown insulation',
'Refurbished Woolaway': 'system built',
'Refurbished Woolaway': 'system built unknown insulation',
'Modern Methods of Construction': 'other',
'BISF - Brit Iron & Steel Federation': 'system built',
'Steel Framed': 'system built',
'BISF - Brit Iron & Steel Federation': 'system built unknown insulation',
'Steel Framed': 'system built unknown insulation',
'Timber Framed with confirmed Fire Stopping': 'timber frame unknown insulation',
'Sipporex': 'system built',
'Sipporex': 'system built unknown insulation',
'Wates': 'system built',
'Bryants': 'system built',
'Gregory (Crosswall)': 'system built',
'Rsmit': 'system built',
'Dorman Long': 'system built',
'Tarmac': 'system built',
'RBIS': 'system built',
'Five Oaks': 'system built',
'Wates': 'system built unknown insulation',
'Bryants': 'system built unknown insulation',
'Gregory (Crosswall)': 'system built unknown insulation',
'Rsmit': 'system built unknown insulation',
'Dorman Long': 'system built unknown insulation',
'Tarmac': 'system built unknown insulation',
'RBIS': 'system built unknown insulation',
'Five Oaks': 'system built unknown insulation',
'Not known': 'unknown',
'Smiths': 'system built',
'Kendrick': 'system built',
'IDC': 'system built',
'Wimpey (Part Brick)': 'system built',
'Whitehall': 'system built',
'Wimpey': 'system built',
'Bison': 'system built',
'Zinns': 'system built',
'Bisf': 'system built',
'Integer': 'system built',
'Cornish': 'system built',
'Rwate': 'system built',
'Hill Presweld Steel': 'system built',
'Smiths': 'system built unknown insulation',
'Kendrick': 'system built unknown insulation',
'IDC': 'system built unknown insulation',
'Wimpey (Part Brick)': 'system built unknown insulation',
'Whitehall': 'system built unknown insulation',
'Wimpey': 'system built unknown insulation',
'Bison': 'system built unknown insulation',
'Zinns': 'system built unknown insulation',
'Bisf': 'system built unknown insulation',
'Integer': 'system built unknown insulation',
'Cornish': 'system built unknown insulation',
'Rwate': 'system built unknown insulation',
'Hill Presweld Steel': 'system built unknown insulation',
'Cavity Filled Cavity': 'filled cavity',
'Cavity Unknown': 'cavity unknown insulation',
'Cavity Filled Cavity (internal)': 'filled cavity',
'': 'unknown',
'Cavity Internal Insulation': 'filled cavity',
'Cavity As Built': "uninsulated cavity",
'Non Trad Large Panel System': 'system built',
'Non Trad Cornish': 'system built',
'Non Trad Reema': 'system built',
'Non Trad Large Panel System': 'system built unknown insulation',
'Non Trad Cornish': 'system built unknown insulation',
'Non Trad Reema': 'system built unknown insulation',
'Traditional Cavity Brickwork': 'cavity unknown insulation',
'System build (undefined)': 'system built',
'Non Trad Wimpey': 'system built',
'Non Trad Wates': 'system built',
'System build (undefined)': 'system built unknown insulation',
'Non Trad Wimpey': 'system built unknown insulation',
'Non Trad Wates': 'system built unknown insulation',
'CAVITY FILLED 270MM': 'filled cavity',
'CAVITY FILLED 270MM': 'filled cavity',
'CAVITY FILLED 250MM': 'filled cavity',
@ -238,17 +278,60 @@ WALL_CONSTRUCTION_MAPPINGS = {
'CAVITY A/B 270MM': "uninsulated cavity",
'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation',
'CAVITY EWI': 'filled cavity',
'SANDSTONE/CAVITY EXT': 'sandstone or limestone',
'SYSTEM BUILD 100MM EWI': 'system built',
'SANDSTONE/CAVITY EXT': 'sandstone or limestone unknown insulation',
'SYSTEM BUILD 100MM EWI': 'insulated system built',
'CAVITY A/B 260MM': "uninsulated cavity",
'CAVITY A/B 270MM': "uninsulated cavity",
'CAVITY A/B 250MM': "uninsulated cavity",
'System': 'system built',
'Sandstone/Limestone': 'sandstone or limestone',
'No Fines': 'system built',
'Granite/Whinstone': 'granite or whinstone',
'System': 'system built unknown insulation',
'Sandstone/Limestone': 'sandstone or limestone unknown insulation',
'No Fines': 'system built unknown insulation',
'Granite/Whinstone': 'granite or whinstone unknown insulation',
'Not applicable to this asset type': 'unknown',
'Steel Frame': 'system built',
'Steel Frame': 'system built unknown insulation',
'Solid Wall As Built': 'uninsulated solid brick',
'Solid As Built': 'uninsulated solid brick'
'Solid As Built': 'uninsulated solid brick',
'Cavity: FilledCavity, Cavity: Unknown': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), TimberFrame: Unknown': 'uninsulated cavity',
'SolidBrick: AsBuilt, SolidBrick: Unknown': 'uninsulated solid brick',
'Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), SolidBrick: Unknown': 'uninsulated cavity',
'Cavity: FilledCavity, TimberFrame: Unknown': 'filled cavity',
'Cavity: AsBuilt (1976-1982), Cavity: Unknown': 'uninsulated cavity',
'Cavity: Unknown, SolidBrick: AsBuilt': 'cavity unknown insulation',
'Cavity: AsBuilt (1976-1982), Cavity: FilledCavity': 'filled cavity',
'Cavity: External, Cavity: FilledCavity': 'filled cavity',
'Cavity: AsBuilt (Post 1995), TimberFrame: AsBuilt': 'filled cavity',
'TimberFrame: AsBuilt, TimberFrame: Internal': 'timber frame unknown insulation',
'GraniteOrWhinstone: AsBuilt': 'uninsulated granite or whinstone',
'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), Cavity: FilledCavity': 'filled cavity',
'SolidBrick: AsBuilt, SolidBrick: External': 'insulated solid brick',
'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity': 'filled cavity',
'Cavity: FilledCavity, SolidBrick: Internal': 'filled cavity',
'Cavity: AsBuilt (Post 1995), Cavity: FilledCavity, SolidBrick: Unknown': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), SolidBrick: AsBuilt': 'uninsulated cavity',
'Cavity: AsBuilt (1976-1982), SolidBrick: AsBuilt': 'filled cavity',
'Cavity: FilledCavity, SolidBrick: AsBuilt': 'filled cavity',
'SolidBrick: External': 'insulated solid brick',
'Cavity: FilledCavity, Cavity: Internal': 'filled cavity',
'Cavity: External, SolidBrick: AsBuilt': 'filled cavity',
'SolidBrick: AsBuilt, TimberFrame: AsBuilt': 'uninsulated solid brick',
'Cavity: FilledCavity, SystemBuilt: AsBuilt': 'filled cavity',
'Cavity: AsBuilt (1976-1982), SystemBuilt: AsBuilt': 'system built',
'Cavity: AsBuilt (Post 1995), SolidBrick: AsBuilt': 'filled cavity',
'Cavity: AsBuilt (1983-1995), TimberFrame: AsBuilt': 'filled cavity',
'SystemBuilt: AsBuilt, TimberFrame: AsBuilt': 'uninsulated system built',
'TimberFrame: Internal': 'insulated timber frame',
'Cavity: Internal': 'filled cavity',
'SystemBuilt: External': 'filled cavity',
'Cavity: AsBuilt (Pre 1976), SystemBuilt: AsBuilt': 'uninsulated cavity',
'SystemBuilt: Internal': 'insulated system built',
'Cavity: AsBuilt (1983-1995), SolidBrick: AsBuilt': 'solid brick unknown insulation',
'Cavity: AsBuilt (Pre 1976), TimberFrame: AsBuilt': 'timber frame unknown insulation',
'SolidBrick: AsBuilt, SolidBrick: Internal': 'uninsulated solid brick',
'Cavity: FilledCavity, TimberFrame: AsBuilt': 'filled cavity',
'Cavity: FilledCavity, SolidBrick: AsBuilt, SolidBrick: Internal': 'filled cavity',
'Cavity: Internal, SolidBrick: AsBuilt': 'filled cavity',
}

View file

@ -6,7 +6,10 @@ epc-api-python==1.0.2
thefuzz
boto3
openpyxl
openai
openai>=1.3.5
tiktoken
msgpack
beautifulsoup4
beautifulsoup4
pydantic>=1.10.7
typing-extensions>=4.5.0
requests>=2.28.2

View file

@ -1,5 +1,5 @@
import time
import numpy as np
import random
import pandas as pd
from backend.SearchEpc import SearchEpc
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
@ -37,7 +37,9 @@ def get_data(
"mid-terrace": "Mid-Terrace",
"end-terrace": "End-Terrace",
"semi-detached": "Semi-Detached",
"detached": "Detached"
"detached": "Detached",
"enclosed end-terrace": "End-Terrace",
"enclosed mid-terrace": "Mid-Terrace",
}
epc_data = []
@ -101,7 +103,6 @@ def get_data(
else:
# Try splitting on space
add1 = full_address.split(" ")[0].strip()
else:
add1 = str(house_number)
searcher = SearchEpc(
@ -172,7 +173,7 @@ def get_data(
find_epc_data = {}
except Exception as e:
raise Exception(f"Error retrieving FindMyEPC data: {e}")
time.sleep(np.random.uniform(0.1, 1))
time.sleep(random.sample(range(50, 100), 1)[0] / 100)
epc = {
row_id_name: home[row_id_name],
@ -182,6 +183,11 @@ def get_data(
}
epc_data.append(epc)
if len(epc_data) % 50 == 0 and len(epc_data) > 0:
logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
time.sleep(10)
except Exception as e:
errors.append(home[row_id_name])
time.sleep(5)

View file

@ -217,6 +217,9 @@ class Property:
self.eco4_eligibility = None
self.whlg_eligibility = None
# Ventilation
self.has_ventilation = self.identify_ventilation()
@classmethod
def extract_kwargs(cls, kwargs):
"""
@ -1197,7 +1200,7 @@ class Property:
self.heating_energy_source = self.heating_energy_source[0]
if self.heating_energy_source == "Varied (Community Scheme)":
if self.main_fuel["fuel_type"] == "mains gas":
if self.main_fuel["fuel_type"] in ["mains gas", None]: # We assume when None as it's unknown
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
raise Exception("Implement me")
@ -1233,6 +1236,13 @@ class Property:
if "air_source_heat_pump" not in measures:
return False
# If we have a house over a floor area threshold, we recommend an ASHP
if (
self.data["property-type"] in ["House", "Bungalow"] and
self.floor_area > assumptions.ASHP_FLOOR_AREA_THRESHOLD
):
return True
suitable_house = self.data["property-type"] == "House" and self.data["built-form"] in [
"Detached", "Semi-Detached", "End-Terrace",
]
@ -1342,3 +1352,12 @@ class Property:
self.gbis_eligibiltiy = funding_calulator.gbis_eligibiltiy
self.eco4_eligibility = funding_calulator.eco4_eligibility
self.whlg_eligibility = funding_calulator.whlg_eligibility
def identify_ventilation(self):
ventilation_descriptions = [
'mechanical, extract only',
'mechanical, supply and extract'
]
return self.data["mechanical-ventilation"] in ventilation_descriptions

View file

@ -58,6 +58,19 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
"Room heaters, wood logs": {"fuel": "Wood Logs", "cop": 1},
"Boiler and radiators, coal": {"fuel": "Coal", "cop": 0.85},
"From main system, no cylinderstat": {"fuel": "Natural Gas", "cop": 0.85},
"Room heaters, coal": {"fuel": "Coal", "cop": 0.85},
"Electric underfloor heating, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
'Room heaters, electric, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85},
'Boiler and radiators, mains gas, Boiler and radiators, mains gas': {"fuel": "Natural Gas", "cop": 0.85},
'Room heaters, electric, Electric storage heaters': {"fuel": "Electricity", "cop": 1},
"Boiler and radiators, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85},
"Boiler and radiators, anthracite": {"fuel": "Anthracite", "cop": 0.85},
'Electric immersion, off-peak, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1},
'Ground source heat pump, radiators, electric': {
"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
},
'Electric instantaneous at point of use, plus solar': {"fuel": "Electricity + Solar Thermal", "cop": 1},
"Electric storage heaters, Room heaters, electric": {"fuel": "Electricity", "cop": 1},
}
# These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
@ -65,3 +78,6 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
measures_needing_ventilation = [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]
# If we have a property beyond this size, we assume it's likely large enough to have an ASHP
ASHP_FLOOR_AREA_THRESHOLD = 120 # m2

View file

@ -96,3 +96,7 @@ class PlanTriggerRequest(BaseModel):
# When performing a remote assessment, if this has been set, it will allow the engine to
# pull data from the find my epc website, to utilise as part of a remote assessment
event_type: Optional[Literal["remote_assessment"]] = None
# If true, before optimising the engine will select a slightly larger package, to account for the SAP 10 causing
# scores to drop by a few points
simulate_sap_10: Optional[bool] = False

View file

@ -30,7 +30,6 @@ import backend.app.assumptions as assumptions
from backend.ml_models.api import ModelApi
from backend.Property import Property
from backend.Funding import Funding
from backend.apis.GoogleSolarApi import GoogleSolarApi
from recommendations.optimiser.CostOptimiser import CostOptimiser
@ -507,7 +506,7 @@ async def model_engine(body: PlanTriggerRequest):
)
# if we have a remote assment data type, we pull the additional data and include it
if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]):
if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")):
logger.info("Retrieving find my epc data")
try:
property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc(
@ -728,7 +727,8 @@ async def model_engine(body: PlanTriggerRequest):
# Additionally, if we have required measures, they should also be included. Therefore
# we can discount the number of points required to get to the target SAP band (or increase)
# in the case of ventilation
needs_ventilation = any(x in property_measure_types for x in assumptions.measures_needing_ventilation)
needs_ventilation = any(
x in property_measure_types for x in assumptions.measures_needing_ventilation) and not p.has_ventilation
input_measures = prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation)
@ -772,6 +772,10 @@ async def model_engine(body: PlanTriggerRequest):
epc_to_sap_lower_bound(body.goal_value) - current_sap_points
) - fixed_gain
if body.simulate_sap_10:
# We add 3 additional SAP points to the required gain to account for SAP 10
sap_gain += 3
if not body.optimise:
if body.goal != "Increasing EPC":
raise NotImplementedError("Only EPC optimisation is currently supported")
@ -826,7 +830,11 @@ async def model_engine(body: PlanTriggerRequest):
)
# If wall insulation is selected, we also include mechanical ventilation as a best practice measure
if any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation):
ventilation_selected = [
r for r in solution if "+mechanical_ventilation" in r["type"]
]
if (any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation) or
len(ventilation_selected)):
ventilation_rec = next(
(r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
None

View file

@ -28,8 +28,8 @@ class AnnualBillSavings:
# Latest price cap figures from Ofgem are for April 2024
# https://www.ofgem.gov.uk/energy-price-cap
ELECTRICITY_PRICE_CAP = 0.2486
GAS_PRICE_CAP = 0.0634
ELECTRICITY_PRICE_CAP = 0.2573
GAS_PRICE_CAP = 0.0633
# This is the most recent export payment figure, at 9.28p/kWh
# Smart export guarantee rates can be found here:
# https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates
@ -39,8 +39,8 @@ class AnnualBillSavings:
PRICE_FACTOR = 0.09549999999999999
# Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
DAILY_STANDARD_CHARGE_GAS = 0.3165
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.6097
DAILY_STANDARD_CHARGE_GAS = 0.2982
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5137
# Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
# For July 2024. These quotes are based on the east midlands region, so we

View file

@ -0,0 +1,38 @@
"""
Brentwood sent us a new asset list in July 2025. This script will combine the data in the new asset list with the
old, so we have a single picture
"""
import pandas as pd
new_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/All Assets "
"29.05.2025.xlsx",
sheet_name="Sheet1",
header=1
)
old_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/BRENTWOOD Asset "
"list.xlsx",
sheet_name="Asset List"
)
# We combine based on the data we want
compiled = new_asset_list.merge(
old_asset_list[["UPRN", "Asset Type", "Year Built", "Dwelling", "Bedrooms", "Ownership", 'Asbestos Full Survey',
'Stock Condition Survey', 'Cat', 'Heating',
'WFT Findings', 'ECO Eligibility', 'CIGA Requested', 'CIGA Guarantee',
'ECO Survey completed']],
how="left",
on="UPRN"
)
compiled["WFT Findings"] = compiled["WFT Findings"].fillna("Not Inspected")
# Store this data
compiled.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme/20250710 Asset List "
"Brentwood.xlsx",
index=False
)

View file

@ -0,0 +1,54 @@
import pandas as pd
comments_df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/CBH_RetroTeamList_amended_25-06-05.xlsx",
)
cavity_route = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/20250708 Colchester Borough Homes- Standardised.xlsx",
sheet_name="July 2025 Route - Cavity"
)
solar_route = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/20250708 Colchester Borough Homes- Standardised.xlsx",
sheet_name="July 2025 Route - Solar"
)
# Merge on the comments
comments = comments_df[
["URPN", 'Unnamed: 6', 'SHDF Live', 'SHDF Removed', 'SHDF Reserve', '25-26 List (138 to EPC)']
].copy()
cavity_route = cavity_route.merge(
comments, left_on="landlord_property_id", right_on="URPN", how="left"
)
solar_route = solar_route.merge(
comments, left_on="landlord_property_id", right_on="URPN", how="left"
)
# Get properties that are not on either route
not_on_routes = comments_df[
~comments_df["URPN"].isin(cavity_route["landlord_property_id"]) &
~comments_df["URPN"].isin(solar_route["landlord_property_id"])
]
# Store
not_on_routes.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/Properties not on routes.xlsx",
index=False
)
# Save the routes
cavity_route.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/Cavity Route.xlsx",
index=False
)
solar_route.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/July Finalised "
"Project/Solar Route.xlsx",
index=False
)

View file

@ -0,0 +1,144 @@
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
from backend.Funding import Funding
from backend.app.utils import sap_to_epc
from recommendations.recommendation_utils import estimate_external_wall_area
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
abs_matrix = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
)
pps_matrix = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx",
header=1
)
pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties - Standardised_2.xlsx",
sheet_name="Standardised Asset List"
)
asset_list = asset_list.rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode"}
)
asset_list["address"] = asset_list["address"].astype(str)
# Pull the find my EPC data and get the SAP points for solid wall
asset_list_epc_client = AssetListEpcData(
asset_list=asset_list,
epc_auth_token=EPC_AUTH_TOKEN
)
asset_list_epc_client.get_data()
asset_list_epc_client.get_non_invasive_recommendations()
# We pull out solid wall insulation
solid_wall_sap_points = []
for r in asset_list_epc_client.non_invasive_recommendations:
solid_recommendations = [
x for x in r["recommendations"] if ("internal_wall_insulation" in x["type"]) or (
"external_wall_insulation" in x["type"]
)
]
if solid_recommendations:
solid_recommendations = solid_recommendations[0]
else:
continue
address = r["address"]
postcode = r["postcode"]
solid_wall_sap_points.append(
{
"address": address,
"postcode": postcode,
"sap_points": solid_recommendations["sap_points"]
}
)
solid_wall_sap_points = pd.DataFrame(solid_wall_sap_points)
avg_points = solid_wall_sap_points["sap_points"].median()
asset_list = asset_list.merge(solid_wall_sap_points, how="left", on=["address", "postcode"])
asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_points)
asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
asset_list["funding_scheme"] = np.where(
(
(asset_list["post_works_epc"] == asset_list["epc_rating_on_register"])
),
"GBIS",
"ECO4"
)
# Merge on the ABS matrix
asset_list = asset_list.merge(
abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
)
asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
# store for backup
# asset_list.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/Solid Wall Properties -
# Standardised_2_with_funding.csv",
# index=False
# )
# For GBIS, we use the PPS
# Almost all properties are gas
# Using IWI solid 1.7 -> 0.3 rates
pps_matrix = pps_matrix[
pps_matrix["Measure_Type"].isin(["IWI_solid_1.7_0.3"])
]
# Merge on
asset_list = asset_list.merge(
pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename(
columns={
"Cost Savings": "partial_project_score",
"Starting Band": "starting_half_band",
"Total Floor Area Band": "floor_area_band"
}
),
how="left",
on=["starting_half_band", "floor_area_band"],
)
asset_list["partial_project_score"] = np.where(
asset_list["starting_half_band"].isin(["Low_C", "High_C"]),
None,
asset_list["partial_project_score"]
)
asset_list["funding_abs"] = np.where(
asset_list["funding_scheme"] == "GBIS",
asset_list["partial_project_score"],
asset_list["Cost Savings"]
)
asset_list["heat_loss_area"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["attribute_est_number_floors"],
floor_height=(
float(x["epc_floor_height"]) if
not pd.isnull(x["epc_floor_height"]) else 2.5
),
perimeter=x["attribute_est_perimter"],
built_form=x["epc_archetype"]
),
axis=1
)
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/20250624 ACIS solid wall - standardised.xlsx"
with pd.ExcelWriter(filename) as writer:
asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)

View file

@ -0,0 +1,49 @@
# Get units for postcodes WF17 8RA, WF17 8RB
import os
import pandas as pd
from epc_api.client import EpcClient
from dotenv import load_dotenv
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
postcodes = [
"WF17 8RA",
"WF17 8RB",
]
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
data = []
for postcode in postcodes:
resp = client.domestic.search(
params={"postcode": postcode, "address": None, "local-authority": None, "property-type": None,
"floor-area": None,
"energy-band": None, "from-month": None, "from-year": None, "to-month": None, "to-year": None,
'constituency': None},
size=1000
)
data.extend(resp["rows"])
df = pd.DataFrame(data)
# Get newest field by UPRN, inspection-date
df["inspection-date"] = pd.to_datetime(df["inspection-date"])
df = df.sort_values(by=["uprn", "inspection-date"], ascending=[True, False])
df = df.drop_duplicates(subset=["uprn"], keep="first")
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Blakeridge Mill/blakeridge_mill_epc_data.xlsx", index=False
)
df = df[df["address"] != "The Tower Blakeridge Mill, Upper Blakeridge Lane"]
df["walls-description"].value_counts()
df["roof-description"].value_counts()
df["total-floor-area"].astype(float).mean()
df["current-energy-efficiency"] = pd.to_numeric(df["current-energy-efficiency"], errors='coerce')
df.groupby("transaction-type")["current-energy-efficiency"].mean()
df["transaction-type"].value_counts()
df[df["transaction-type"] == "rental"]["built-form"].value_counts()

View file

@ -0,0 +1,289 @@
import pandas as pd
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
import numpy as np
contact_list = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
"PV address list - second wave KLD - PP.csv"
)
contact_list["house_no"] = contact_list.apply(lambda x: SearchEpc.get_house_number(
address=str(x["Address 1: Street 1"]).strip(),
postcode=str(x["Postal Code"]).strip(),
), axis=1)
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/asset_list - "
"Standardised (1).xlsx",
sheet_name="Standardised Asset List"
)
lookup = []
missed = []
for _, x in tqdm(contact_list.iterrows(), total=len(contact_list)):
if x["Address 1: Street 1"] == '1 The Beck':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 40692,
}
)
continue
if x["Address 1: Street 1"] == '3 The Beck ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 40693,
}
)
continue
if x["Address 1: Street 1"] == '2 Orchard Close ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 7924,
}
)
continue
if x["Address 1: Street 1"] == '2 Orchard Close ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 7924,
}
)
continue
if x["Address 1: Street 1"] == '3 Croxall Road':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 40650,
}
)
continue
if x["Address 1: Street 1"] == '4 Ward Road ':
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": 33175,
}
)
continue
df = asset_list[
asset_list["domna_full_address"].str.replace(",", "").str.contains(x["Address 1: Street 1"].strip()) &
asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
]
if df.shape[0] != 1:
df = asset_list[
asset_list["domna_full_address"].str.replace(",", "") == x["Address 1: Street 1"].strip() &
asset_list["domna_postcode"].str.contains(x["Postal Code"].strip())
]
if df.shape[0] != 1:
df = asset_list[
(asset_list["domna_address_1"].astype(str) == str(x["house_no"])) &
(asset_list["domna_postcode"].str.contains(x["Postal Code"].strip()) == True)
]
if df.shape[0] != 1:
missed.append(x["UPRN"])
continue
lookup.append(
{
"UPRN": x["UPRN"],
"landlord_property_id": df["landlord_property_id"].values[0],
}
)
lookup = pd.DataFrame(lookup)
contact_list = contact_list.merge(lookup, how="left", on="UPRN")
# Store
contact_list.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
"PV address list - second wave KLD - PP with landlord_property_id.csv",
index=False
)
# I manually completed the lookup for the missed ones. We now read it back in and pull in the properties for the
# stndardised asset list
contacts_complete = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Bromford - Solar "
"PV address list - second wave KLD - PP with landlord_property_id.csv"
)
new_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Master Sheet "
"Solar PV installs.xlsx",
sheet_name="Sheet1"
)
contact_list = contact_list.merge(
new_data,
how="left",
left_on="UPRN",
right_on="CE UPRN"
)
route = asset_list[
asset_list["landlord_property_id"].isin(contact_list["Legacy UPRN"].astype("Int64").astype(str))
].copy()
# Add the new heating data
contact_list["Legacy UPRN"] = contact_list["Legacy UPRN"].astype("Int64").astype(str)
route2 = contact_list.merge(
route,
how="left",
right_on="landlord_property_id",
left_on="Legacy UPRN"
)
# Because I did a data pull, we can fill the other bits of information
missed = contact_list[~contact_list["Legacy UPRN"].isin(route["landlord_property_id"].astype(int))]
# Store both the route and missed
route2.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/route.csv",
index=False
)
# Add on phone number
contact_details_filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme "
"Hubspot Upload/Hubspot/Bromford - Solar PV address list - second wave KLD - PP with "
"landlord_property_id.xlsx")
contacts_filenames = [
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
"details/FAO Paul Contact Details-Table 1.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
"details/Green Contact Details-Table 1.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/contact "
"details/Main Contact Details-Table 1.csv",
]
merge_to = pd.read_excel(contact_details_filepath)
lookup = []
for fn in contacts_filenames:
df = pd.read_csv(fn, encoding="utf-8-sig")
# Merge on phone
details = df[
df["Property Reference Number (Main Address) (Property)"].isin(merge_to["UPRN"].astype(str))
][[
"Property Reference Number (Main Address) (Property)", "Landline", "Mobile Phone", "Email Address",
"First Name", "Last Name"
]]
lookup.append(details)
lookup = pd.concat(lookup)
# Drop entries where landline, mobile and email are all NaN
lookup = lookup.dropna(subset=["Landline", "Mobile Phone", "Email Address"], how="all")
lookup = lookup.drop_duplicates(["Landline", "Mobile Phone", "Email Address"])
# Sort so email is first, then landline, then mobile
lookup = lookup.sort_values(
["Property Reference Number (Main Address) (Property)", "Email Address", "Landline", "Mobile Phone"],
ascending=[True, True, True, True]
)
# Store
lookup.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/contact "
"details.csv",
index=False
)
lookup2 = []
for _, x in lookup.groupby("Property Reference Number (Main Address) (Property)"):
# We any entries have an email, we take that
if x["Email Address"].notna().any():
x = x[x["Email Address"].notna()]
# We then take the entry with a phone number
if x["Landline"].notna().any() or x["Mobile Phone"].notna().any():
x = x[x["Landline"].notna() | x["Mobile Phone"].notna()]
# Take the first entry
x = x.iloc[0]
lookup2.append(x)
lookup2 = pd.DataFrame(lookup2)
import pandas as pd
# Sample structure based on your columns
columns = ['Property Reference Number (Main Address) (Property)', 'Landline', 'Mobile Phone', 'Email Address']
# Simulating example input DataFrame
# In practice, you would use: lookup = pd.read_csv(...) or similar
lookup = pd.DataFrame(columns=columns)
# Grouping and transforming
results = []
for prop_id, group in lookup.groupby("Property Reference Number (Main Address) (Property)"):
# Filter rows with any contact information
filtered = group[
group["Email Address"].notna() &
(group["Landline"].notna() | group["Mobile Phone"].notna())
]
if filtered.empty:
continue
# Sort by presence of phone numbers (prioritize those with both)
filtered["contact_score"] = (
filtered["Landline"].notna().astype(int) +
filtered["Mobile Phone"].notna().astype(int)
)
filtered = filtered.sort_values("contact_score", ascending=False)
primary = filtered.iloc[0]
# Make sure secondary is not the same as primary
if not pd.isnull(primary["Mobile Phone"]):
secondary = filtered[
(filtered["Mobile Phone"] != primary["Mobile Phone"])
]
elif not pd.isnull(primary["Landline"]):
secondary = filtered[
(filtered["Landline"] != primary["Landline"])
]
else:
raise Exception("Look at me")
secondary = filtered.iloc[1] if len(filtered) > 1 else None
results.append({
"Property ID": prop_id,
"Primary Email": primary["Email Address"],
"Primary Phone": primary["Mobile Phone"] or primary["Landline"],
"Secondary Email": secondary["Email Address"] if secondary is not None else None,
"Secondary Phone": secondary["Mobile Phone"] or secondary["Landline"] if secondary is not None else None,
})
final_df = pd.DataFrame(results)
import ace_tools as tools;
tools.display_dataframe_to_user(name="Cleaned Contact Lookup", dataframe=final_df)
# We set up primary and secondary phone numbers. We use mobile as the primary
# We have duplicates, we prioritise entries, by ID, that have a email
lookup2 = lookup.sort_values("Property Reference Number (Main Address) (Property)").drop_duplicates(
"Property Reference Number (Main Address) (Property)", keep="last"
)
# TODO: Get into the standardised asset list format
# TODO: Add the deal postcode to Hubspot
# TODO: Upload the deal postcode

View file

@ -0,0 +1,45 @@
import pandas as pd
houses_list = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(UNCHECKED).csv"
)
features = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing BC - HOUSES(IGNORE - FULL MAIN).csv"
)
features = features.drop(
columns=[
'Archetype', 'Construction', 'Insulated', 'Material',
'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES',
'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'Surveyors Name',
'Unnamed: 30', 'Unnamed: 31'
]
)
demolitions = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing - Demolished or due to be.xlsx",
sheet_name="Demolished or due to be"
)
inspections_data = houses_list[
[
"Property ref", "Postcode", 'Archetype', 'Construction', 'Insulated', 'Material',
'CIGA Check Required', 'PV, ACCESS ISSUE, SEE NOTES',
'OFF GAS - ROOF ORIENTATION', 'Any further surveyor notes', 'YET TO BE SURVEYED'
]
].rename(columns={"YET TO BE SURVEYED": "Surveyors Name"})
asset_list = features.drop(
columns=[
'Archetype', 'Construction', 'Insulated', 'Material', 'CIGA Check Required',
'PV, ACCESS ISSUE, SEE NOTES', 'OFF GAS - ROOF ORIENTATION',
'Any further surveyor notes', 'Surveyors Name', "Postcode"
]
).merge(
inspections_data,
how="inner",
on="Property ref",
)
asset_list.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Ealing_rechecked_cleaned_05042025.csv",
index=False)

View file

@ -0,0 +1,75 @@
import numpy as np
import pandas as pd
from asset_list.hubspot.config import HubspotProcessStatus
project_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/Ealing Flats Completion Tracker JW "
"170625.xlsx",
sheet_name="All_Flats"
)
project_data["hubspot_status"] = None
project_data["hubspot_status"] = np.where(
(project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2023"),
HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
project_data["hubspot_status"]
)
project_data["hubspot_status"] = np.where(
(project_data["Status"] == "Submitted") & (project_data["PAS"] == "PAS2019"),
"SURVEYED UNDER 2019 - NEEDS RE-SURVEY",
project_data["hubspot_status"]
)
project_data["project_code"] = "EALING-FLATS-" + project_data["Block Ref"].astype(str)
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
sheet_name="Standardised Asset List"
)
asset_list["landlord_property_id"] = asset_list["landlord_property_id"].astype(str)
asset_list["incorrect_landlord_property_id"] = asset_list["incorrect_landlord_property_id"].astype(str)
project_data["Property ref"] = project_data["Property ref"].astype(str)
# We need to update the status of properties that already been surveyed
asset_list2 = asset_list.merge(
project_data[["Property ref", "hubspot_status", "project_code"]],
how="left",
right_on="Property ref",
left_on="incorrect_landlord_property_id",
suffixes=("", "_project")
)
asset_list2["hubspot_status"] = np.where(
~pd.isna(asset_list2["hubspot_status_project"]),
asset_list2["hubspot_status_project"],
asset_list2["hubspot_status"]
)
asset_list2["project_code"] = np.where(
~pd.isna(asset_list2["project_code"]),
asset_list2["project_code"],
asset_list2["landlord_property_id"]
)
asset_list2 = asset_list2.drop(columns=["hubspot_status_project", "project_code_project"])
asset_list2["cavity_reason"] = np.where(
pd.isnull(asset_list2["cavity_reason"]),
"Non-Intrusive Data Shows Empty Cavity: SAP Rating 55-68",
asset_list2["cavity_reason"]
)
asset_list2["solar_reason"] = None
# Read in block analysis and geographical areas from standardised asset list
block_analysis_df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
sheet_name="Block Analysis"
)
geographical_areas = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Standardised.xlsx",
sheet_name="Geographical Areas"
)
# Update the new standardised asset list
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot/20250707 Ealing Flats - Prepared "
"programme.xlsx")
with pd.ExcelWriter(filename) as writer:
asset_list2.to_excel(writer, sheet_name="Standardised Asset List", index=False)
block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)

View file

@ -0,0 +1,116 @@
#
import pandas as pd
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="Standardised Asset List"
)
new_cavity_programme = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="New Cavity Programme"
)
new_cavity_pilot = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="Empty Cavity Pilot"
)
new_solar_programme = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/12052025 MHS Standardised Asset List - "
"programme.xlsx",
sheet_name="New Solar Programme"
)
in_fill_properties_houses = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
"1).xlsx",
sheet_name="Houses and Bungalows"
)
in_fill_properties_flats = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/Domna updated programme list May 2025 ("
"1).xlsx",
sheet_name="Flats and Maistonettes"
)
# Q1) What are these properties? Do we have them on our list already?
# All of the houses are already in the asset list
in_fill_properties_houses["is_in_asset_list"] = in_fill_properties_houses["UPRN"].isin(
asset_list["landlord_property_id"].values
)
# All of the flats are already in the asset list
in_fill_properties_flats["is_in_asset_list"] = in_fill_properties_flats["UPRN"].isin(
asset_list["landlord_property_id"].values
)
# Q2) Which properties are excluded from the new programme?
in_fill_properties = pd.concat(
[in_fill_properties_houses, in_fill_properties_flats], ignore_index=True, sort=False
)
# Merge on the data
in_fill_properties = in_fill_properties.merge(
asset_list,
left_on="UPRN",
right_on="landlord_property_id",
how="left"
)
# How many properties are in the new programme?
in_fill_properties["in_new_cavity_programme"] = in_fill_properties["UPRN"].isin(
new_cavity_programme["landlord_property_id"].values
)
in_fill_properties["in_new_solar_programme"] = in_fill_properties["UPRN"].isin(
new_solar_programme["landlord_property_id"].values
)
in_fill_properties["in_new_cavity_pilot"] = in_fill_properties["UPRN"].isin(
new_cavity_pilot["landlord_property_id"].values
)
not_in_new_programme = in_fill_properties[
(~in_fill_properties["in_new_cavity_programme"] & ~in_fill_properties["in_new_solar_programme"] & ~
in_fill_properties["in_new_cavity_pilot"])
].copy()
# Why?
not_in_new_programme["cavity_reason"].value_counts()
not_in_new_programme["solar_reason"].value_counts()
not_identified_for_anything = not_in_new_programme[
pd.isnull(not_in_new_programme["cavity_reason"]) &
pd.isnull(not_in_new_programme["solar_reason"])
]
# Flag the potential re-inspections which is 994 properties though any extractions we need to consider the HA funding
# the extraction
not_in_new_programme["funded_extractions"] = not_in_new_programme["cavity_reason"].isin(
[
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 69-75",
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 55-68",
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 76 or more",
"Non-Intrusive Data Shows Cavity Extraction: SAP Rating 54 or less",
"EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 76 or more",
"EPC Shows Empty Cavity, inspections show non-cavity build: SAP Rating 54 or less",
"EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 54 or less",
"EPC Shows Empty Cavity, inspections show retro drilled: SAP Rating 76 or more",
]
)
not_in_new_programme["excluded"] = not_identified_for_anything["landlord_property_id"].isin(
not_identified_for_anything["landlord_property_id"].values
)
not_in_new_programme[
not_in_new_programme["funded_extractions"]
].to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/funded_extractions.csv",
index=False
)
not_in_new_programme[
not_in_new_programme["excluded"] == True
].to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/New programme/excluded_properties.csv",
index=False
)

View file

@ -0,0 +1,14 @@
import pandas as pd
cavity = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx",
sheet_name="Cavity Properties (for review)",
)
solar = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx",
sheet_name="Solar Properties",
)
cavity_al = cavity[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
)

View file

@ -0,0 +1,48 @@
"""
July 2025, this script prepares the asset list for Plus Dane
"""
import pandas as pd
oldest_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/PLUS DANE Asset List.xlsx"
)
solar_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane - potential "
"PV List 04.03.2025.xlsx"
)
newest_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Sava Intelligent Energy "
"- Property List - March 2025.xlsx"
)
old_missed = oldest_asset_list[~oldest_asset_list["UPRN"].isin(newest_asset_list["UPRN"])]
solar_missed = solar_asset_list[~solar_asset_list["UPRN"].isin(newest_asset_list["UPRN"])] # Empty
# Build new asset list
# NEWEST
# 'UPRN', 'Address', 'Postcode', 'Town', 'EPC SAP Band', 'SAP Rating',
# 'CO₂ Emissions', 'EPC EI Band', 'Data Quality Indicator',
# 'Results Calculated', 'Property Age', 'Property Type', 'Built Form',
# 'Wall Construction', 'Wall Insulation', 'Roof Construction',
# 'Joist Insulation', 'Space Heating System', 'Space Heating Fuel'
#
# SOlAR
df = newest_asset_list.merge(
solar_asset_list, how="left", on="UPRN", suffixes=("", "_solar"),
).merge(
oldest_asset_list, how="left", on="UPRN", suffixes=("", "_old")
)
df["asset_list_versiion"] = "July 2025"
old_missed["asset_list_versiion"] = "Historic"
# Append on the old missed?
df = pd.concat(
[df, old_missed], ignore_index=True, sort=False
)
# Store excel
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/Plus Dane Asset List "
"July 2025.xlsx",
index=False,
)

View file

@ -4,7 +4,7 @@ from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
PORTFOLIO_ID = 141
PORTFOLIO_ID = 212
USER_ID = 8
load_dotenv(dotenv_path="backend/.env")
@ -17,25 +17,15 @@ def app():
:return:
"""
asset_list = [
{
"address": "196 Merrow Street",
"postcode": "SE17 2NP",
"uprn": 200003423454,
"patch": True
},
{
"address": "65 Liverpool Grove",
"postcode": "SE17 2HP",
"uprn": 200003423194
},
{
"address": "2 Brettell Street",
"postcode": "SE17 2NZ",
"uprn": 200003423607
},
]
asset_list = pd.DataFrame(asset_list)
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/Energy Information MASTER June 2025 - Standardised.xlsx",
sheet_name="Solar Properties",
)
asset_list = asset_list[~asset_list["estimated"]]
asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str)
asset_list = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"}
)
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
@ -98,14 +88,15 @@ def app():
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": valuation_filename,
"valuation_file_path": "",
"scenario_name": "Full package remote assessment",
"multi_plan": True,
"budget": None,
"inclusions": ["cavity_wall_insulation", "ventilation"]
}
print(body)

View file

@ -0,0 +1,40 @@
"""
This script will pull in properties, in neighbouring areas, that have been flagged for CWI
"""
import pandas as pd
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="Standardised Asset List"
)
cavity_areas = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="Cavity Areas"
)
existing_inspections_sheet = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="July 2025 Inspections"
)
empties = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/Thrive Programme - "
"reconciled.xlsx",
sheet_name="Cavity properties - for review"
)
cavity_inspections = asset_list[
asset_list["domna_postcode"].isin(cavity_areas["domna_postcode"].values)
]
cavity_inspections = cavity_inspections[
~cavity_inspections["landlord_property_id"].isin(empties["landlord_property_id"].values)
]
cavity_inspections.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/July 2025 Inspections/cavity_inspections.csv",
index=False
)

View file

@ -1,3 +1,4 @@
import random
import time
import pandas as pd
from tqdm import tqdm
@ -27,6 +28,7 @@ class AssetListEpcData:
self.extracted_data = None
self.non_invasive_recommendations = None
self.patches = None
self.epc_data = None
@staticmethod
def check_asset_list(asset_list):
@ -49,7 +51,7 @@ class AssetListEpcData:
"uprn": r.get("uprn"),
"address": r["address"],
"postcode": r["postcode"],
"recommendations": r["recommendations"]
"recommendations": r.get("recommendations")
} for r in self.extracted_data
]
@ -74,7 +76,9 @@ class AssetListEpcData:
# Pull the additional data
extracted_data = []
epc_data = []
for _, home in tqdm(self.asset_list.iterrows(), total=len(self.asset_list)):
add1 = home["address"]
pc = home["postcode"]
# Retrieve the EPC data
@ -92,9 +96,6 @@ class AssetListEpcData:
if epc_searcher.newest_epc is None:
continue
if not pd.isnull(home.get("patch")):
epc_searcher.newest_epc["address1"] = add1
# Attempt both methods:
try:
find_epc_searcher = RetrieveFindMyEpc(
@ -104,16 +105,37 @@ class AssetListEpcData:
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error(f"Error retrieving find my epc data: {e}")
find_epc_searcher = RetrieveFindMyEpc(
address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
time.sleep(0.5)
if not pd.isnull(home.get("patch")):
epc_searcher.newest_epc["address1"] = add1
try:
find_epc_searcher = RetrieveFindMyEpc(
address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error("Error retrieving find my epc data with alternative address format: {e}")
find_epc_data = {
"current_epc_rating": epc_searcher.newest_epc["current-energy-rating"],
"current_epc_efficiency": epc_searcher.newest_epc["current-energy-efficiency"],
"potential_epc_rating": None,
"potential_epc_efficiency": None,
"epc_data": {}
}
# Sleep for a random amount of time between 0.5 and 1 seconds to avoid hitting the API rate limit
time.sleep(random.sample(range(50, 100), 1)[0] / 100)
# Every 50 requests, we sleep for 10 seconds to avoid hitting the API rate limit
if len(extracted_data) % 50 == 0 and len(extracted_data) > 0:
logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
time.sleep(10)
# We need uprn
to_append = {
"uprn": home.get("uprn"),
"uprn": home.get("uprn", epc_searcher.newest_epc["uprn"]),
"address": home["address"],
"postcode": home["postcode"],
**find_epc_data,
@ -128,6 +150,8 @@ class AssetListEpcData:
}
extracted_data.append(to_append)
epc_data.append(epc_searcher.newest_epc)
self.extracted_data = extracted_data
self.epc_data = epc_data
logger.info("Data Extrction complete")

View file

@ -1,3 +1,4 @@
import time
import re
import pandas as pd
import requests
@ -55,9 +56,11 @@ class RetrieveFindMyEpc:
results = {}
# 1. Total floor area
results['total-floor-area'] = int(self.get_text(
# We have some isntances of very old EPCs where the total floor area is not available
tfa = self.get_text(
soup.find("dt", string="Total floor area").find_next_sibling("dd")
).split(" ")[0])
).split(" ")[0]
results['total-floor-area'] = int(tfa) if tfa != "Not" else None
# Table with features
rows = soup.select("table.govuk-table tbody tr")
@ -125,9 +128,156 @@ class RetrieveFindMyEpc:
return results
def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
def _extract_epc_from_soup(self, soup, epc_certificate, sap_2012_date=None):
ratings = soup.find('desc', {'id': 'svg-desc'}).text
current_rating = ratings.split(".")[0]
potential_rating = ratings.split(".")[1]
current_sap = int(current_rating.split(' ')[-1])
# Retrieve the energy consumption
bills = soup.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
if not bills_list:
# If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
heating_text = None
hot_water_text = None
else:
heating_text = bills_list[0].text
hot_water_text = bills_list[1].text
# Retrieve the recommendations and SAP points
recommendations = []
recommendations_div = soup.find('div', class_='epb-recommended-improvements')
if recommendations_div:
# Find all h3 headers for each step and extract their related information
step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
previous_sap_score = current_sap
previous_epc = current_rating.split(' ')[-6]
for step_num, step_header in enumerate(step_headers, start=1):
# Extract the step title (the measure)
measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
# Find the div containing the potential rating within the same section
potential_rating_div = step_header.find_next(
'div', class_='epb-recommended-improvements__potential-rating'
)
# Check if the potential rating div is found
if potential_rating_div:
# Extract the rating text within the SVG text element
extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
if extracted_rating_text is not None:
rating_text = extracted_rating_text.text.strip()
else:
rating_text = " ".join([str(previous_sap_score), previous_epc])
# Parse the rating text to separate the numeric rating and EPC letter
new_rating = int(rating_text.split()[0])
new_epc = rating_text.split()[1]
# Append the information as a dictionary to the recommendations list
recommendations.append({
"step": step_num,
"measure": measure_title,
"new_rating": new_rating,
"new_epc": new_epc,
"sap_points": new_rating - previous_sap_score
})
previous_sap_score = new_rating
previous_epc = new_epc
# Search for the assessment informaton
assessment_information = soup.find('div', {'id': 'information'})
# Parse this information
rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
# Create a dictionary to hold the parsed information
assessment_data = {}
for row in rows:
key = row.find('dt').text.strip()
if key == "Type of assessment":
# We dont reliably extract this
continue
value_tag = row.find('dd')
# Check if value contains a link (email)
if value_tag.find('a'):
value = value_tag.find('a').text.strip()
elif value_tag.find('summary'):
value = value_tag.find('span').text.strip()
else:
value = value_tag.text.strip()
# These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
# get the surveyor's name and email so we make that information clear
if key in ["Telephone", "Email"]:
if "Assessor's " + key not in assessment_data:
assessment_data["Assessor's " + key] = value
else:
assessment_data["Accreditation Scheme's " + key] = value
continue
assessment_data[key] = value
expected_keys = [
'Assessors name',
"Assessor's Telephone",
"Assessor's Email",
'Assessors ID',
'Accreditation scheme',
'Assessors declaration',
"Accreditation Scheme's Telephone",
"Accreditation Scheme's Email",
'Date of assessment',
'Date of certificate'
]
# Check we have all the expected keys
for key in expected_keys:
if key not in assessment_data:
raise ValueError(f"Missing key: {key}")
# The wall types of the property
property_features_table = soup.find("tbody", class_="govuk-table__body")
property_features_table = property_features_table.find_all("tr")
# Extract wall types
self.walls = []
for row in property_features_table:
cells = row.find_all("td")
if row.find("th").text.strip() == "Wall":
self.walls.append(cells[0].text.strip())
# Finally, we format the recommendations
recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
# 4) Low and zero carbon energy sources
low_carbon_energy_sources = self.extract_low_carbon_sources(soup)
# 5) Pull out the EPC data
epc_data = self.extract_epc_data(soup)
resulting_data = {
'epc_certificate': epc_certificate,
'current_epc_rating': current_rating.split(' ')[-6],
'current_epc_efficiency': current_sap,
'potential_epc_rating': potential_rating.split(' ')[-6],
"potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
"heating_text": heating_text,
"hot_water_text": hot_water_text,
"recommendations": recommendations,
"epc_data": epc_data,
**assessment_data,
**low_carbon_energy_sources,
}
return resulting_data
def retrieve_all_find_my_epc_data(self, sap_2012_date=None):
"""
For a post code and address, we pull out all the required data from the find my epc website
This is a quick function to retrieve all the data from the find my epc website for a given postcode and address.
Using this to fulfill a short term need to retrieve all history for a property
:param sap_2012_date:
:return:
"""
postcode_input = self.postcode.replace(" ", "+")
@ -182,6 +332,98 @@ class RetrieveFindMyEpc:
address_response = requests.get(chosen_epc, headers=self.HEADERS)
address_res = BeautifulSoup(address_response.text, features="html.parser")
# We check the section on "Other cerificates for this property and get the url"
# Find the section for other certificates
other_cert_section = address_res.find('div', id='other_certificates_and_reports')
# Extract all certificate number rows (anchor tags within a govuk-summary-list)
other_cert_links = other_cert_section.select('dd.govuk-summary-list__value a')
other_certificates = []
for link in other_cert_links:
cert_number = link.text.strip()
cert_url = link['href'].strip()
other_certificates.append({
"certificate_number": cert_number,
"certificate_url": f"https://find-energy-certificate.service.gov.uk{cert_url}"
})
# Always include the currently selected EPC first
soup_list = [address_res]
# Add additional historic certificates
for link in other_cert_links:
cert_url = f"https://find-energy-certificate.service.gov.uk{link['href'].strip()}"
response = requests.get(cert_url, headers=self.HEADERS)
time.sleep(0.3)
soup_list.append(BeautifulSoup(response.text, features="html.parser"))
all_find_my_epc_data = []
for soup in soup_list:
# Start with the primary one
all_find_my_epc_data.append(self._extract_epc_from_soup(soup, epc_certificate, sap_2012_date))
return all_find_my_epc_data
def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
"""
For a post code and address, we pull out all the required data from the find my epc website
"""
postcode_input = self.postcode.replace(" ", "+")
postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
postcode_response = requests.get(postcode_search, headers=self.HEADERS)
postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
rows = postcode_res.find_all('tr', class_='govuk-table__row')
extracted_table = []
for row in rows:
# Extract the address and URL
address_tag = row.find('a', class_='govuk-link')
if address_tag is None:
continue
extracted_address = None
extracted_address_url = None
if address_tag:
extracted_address = address_tag.text.strip()
extracted_address_url = address_tag['href']
extracted_address_cleaned = (
extracted_address.replace(",", "").replace(" ", "").lower()
)
if not extracted_address_cleaned.startswith(self.address_cleaned):
continue
# If the address is a match, we can extract the data
# Extract the expiry date
expiry_date_tag = row.find('td', class_='govuk-table__cell date')
expiry_date = None
if expiry_date_tag is not None:
expiry_date = expiry_date_tag.parent.find('span').text.strip()
extracted_table.append(
{
"extracted_address": extracted_address,
"extracted_address_url": extracted_address_url,
"expiry_date": datetime.strptime(expiry_date, '%d %B %Y'),
}
)
if not extracted_table:
raise ValueError("No EPC found")
if len(extracted_table) > 1:
# We take the one with the most recent expiry date
extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
epc_certificate = chosen_epc.split('/')[-1]
address_response = requests.get(chosen_epc, headers=self.HEADERS)
address_res = BeautifulSoup(address_response.text, features="html.parser")
# Key data we want to retrieve:
# 1) Rating
# 2) Bills estimates
@ -195,9 +437,6 @@ class RetrieveFindMyEpc:
potential_rating = ratings.split(".")[1]
current_sap = int(current_rating.split(' ')[-1])
# Floor area
address_res.find()
# Retrieve the energy consumption
bills = address_res.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
@ -432,6 +671,13 @@ class RetrieveFindMyEpc:
"Condensing boiler (separate from the range cooker)": ["boiler_upgrade"],
"Heating controls (programmer and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
'Heating controls (programmer room thermostat and thermostatic radiator valves)': [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Internal wall insulation": ["internal_wall_insulation"],
"High heat retention storage heaters and dual immersion cylinder and dual rate meter": [
"high_heat_retention_storage_heater"
]
}
@ -466,8 +712,13 @@ class RetrieveFindMyEpc:
find_epc_data = searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error(f"Error retrieving find my epc data: {e}")
if epc["address1"] == epc["address"]:
# There's no benefit of using the same address, so we split on comma
address1 = epc["address"].split(",")[0]
else:
address1 = epc["address1"]
# We attempt with the backup add
searcher = cls(address=epc["address1"], postcode=epc["postcode"])
searcher = cls(address=address1, postcode=epc["postcode"])
find_epc_data = searcher.retrieve_newest_find_my_epc_data()
non_invasive_recommendations = {

View file

@ -194,7 +194,7 @@ class Costs:
IWI_CONTINGENCY = 0.2
# For air source heat pumps, we inflate the assume cost by quite a bit to account for design and installation
ASHP_CONTINGENCY = 0.35
ASHP_CONTINGENCY = 0.25
# Where there is more uncertainty, a higher contingency rate is used
HIGH_RISK_CONTINGENCY = 0.2
# When there is less uncertainty, a lower contingency rate is used
@ -871,10 +871,10 @@ class Costs:
if needs_cylinder:
# 1000 is the cost of a new hot water cylinder
total_cost = 1200 * number_heated_rooms + 1000
total_cost = 1300 * number_heated_rooms + 1000
else:
# 500 is the cost of a dual immersion heater - a rough estimate
total_cost = 1200 * number_heated_rooms + 500
total_cost = 1300 * number_heated_rooms + 500
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat

View file

@ -57,6 +57,31 @@ class HeatingRecommender:
},
# These are the heating types we need to produce a dual heating recommendation
"dual": None
},
'Electric underfloor heating, electric storage heaters': {
# For this, we would recommend a heat pump
"dual": None
},
"Room heaters, electric, boiler and radiators, mains gas": {
"hhr": {
"mainheating_description": "Electric storage heaters, radiators",
"recommendation_description": "Install high heat retention electric storage heaters.",
"controls_prefix": ""
},
"boiler": {
"mainheating_description": "Boiler and radiators, mains gas",
"recommendation_description": "Upgrade to a new condensing boiler.",
"controls_suffix": ""
},
"dual": None
},
"Room heaters, electric, electric storage heaters": {
"hhr": {
"mainheating_description": "Electric storage heaters, radiators",
"recommendation_description": "Install high heat retention electric storage heaters.",
"controls_prefix": ""
},
"dual": None
}
}
@ -109,6 +134,10 @@ class HeatingRecommender:
hhr_suitable = no_mains or self.has_electric_heating_description or self.has_room_heaters
hhr_suitable = hhr_suitable and (
"underfloor heating" not in self.property.main_heating["clean_description"]
)
return (
hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and
("high_heat_retention_storage_heater" in measures)
@ -165,7 +194,8 @@ class HeatingRecommender:
) and
(not ashp_only_heating_recommendation) and
("boiler_upgrade" in measures) and
(not self.has_ashp)
(not self.has_ashp) and
(not self.property.main_heating["has_warm_air"])
)
return is_valid, has_gas_boiler
@ -487,17 +517,30 @@ class HeatingRecommender:
]
# This is a map from the heating controls description to the description of the air source heat pump set up
ashp_descriptions = {
"Time and temperature zone control": (
f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, "
"room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 or "
"24 hour tariff"
),
"Programmer, TRVs and bypass": (
f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure you "
"have an 18 or 24 hour tariff"
),
}
if ashp_size is None:
ashp_descriptions = {
"Time and temperature zone control": (
f"Install two cascaded air source heat pumps, and upgrade heating controls to Smart Thermostats, "
"room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 "
"or "
"24 hour tariff"
)
}
else:
ashp_descriptions = {
"Time and temperature zone control": (
f"Install a {ashp_size}KW air source heat pump, and upgrade heating controls to Smart Thermostats, "
"room sensors and smart radiator valves (time & temperature zone control). Ensure you have an 18 "
"or "
"24 hour tariff"
),
"Programmer, TRVs and bypass": (
f"Install a {ashp_size}KW air source heat pump, with programmer, TRVs and a Bypass valve. Ensure "
f"you "
"have an 18 or 24 hour tariff"
),
}
new_heating_description = "Air source heat pump, radiators, electric"
new_hot_water_description = "From main system"
@ -924,6 +967,7 @@ class HeatingRecommender:
return recommendations
self.heating_recommendations.extend(recommendations)
return None
@staticmethod
def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):

View file

@ -679,7 +679,7 @@ class Recommendations:
# Handle the case of community schemes
if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"):
if main_fuel_description == "mains gas (community)":
if main_fuel_description in ["mains gas (community)", "UNKNOWN"]:
return {
"heating_fuel_type": "Natural Gas (Community Scheme)",
"hotwater_fuel_type": "Natural Gas (Community Scheme)",

View file

@ -191,11 +191,22 @@ class RoofRecommendations:
non_invasive_recommendations = self.property.non_invasive_recommendations
# We check a specific condition - which will imply loft insulation isn't appropriate but room in roof
# insulation is
# 1) We have an uninsulated loft (assumed)
# 2) We have a non-intrusive recommendation for room in roof insulation
rir_over_loft = (
self.property.roof["is_pitched"] and
self.property.roof["insulation_thickness"] == "none" and
"room_in_roof_insulation" in [x["type"] for x in non_invasive_recommendations]
)
# We firstly handle non-intrusive recommendations, which may override the normal roof insulation recommendations
if ("loft_insulation" in [x["type"] for x in non_invasive_recommendations]) or (
self.property.roof["is_pitched"] and "loft_insulation" in measures and
not self.property.roof["is_at_rafters"]
):
) and not rir_over_loft:
self.recommend_roof_insulation(
u_value=u_value,
insulation_thickness=self.insulation_thickness,
@ -223,7 +234,8 @@ class RoofRecommendations:
# There are cases where the property might have a room roof as the second roof, but we have a recommendation for
# it, so we allow this override
if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures) or (
"room_roof_insulation" in [x["type"] for x in non_invasive_recommendations]
"room_roof_insulation" in [x["type"] for x in non_invasive_recommendations] or
rir_over_loft
):
self.recommend_room_roof_insulation(u_value, phase, default_u_values)
return
@ -502,7 +514,7 @@ class RoofRecommendations:
# and the cost of the materials
rir_non_invasive_recommendation = next(
(x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {}
(x for x in self.property.non_invasive_recommendations if x["type"] == "room_in_roof_insulation"), {}
)
insulation_materials = pd.DataFrame(self.room_roof_insulation_materials)

View file

@ -10,11 +10,6 @@ class VentilationRecommendations(Definitions):
crucial for prevent overheating risks in warmer months
"""
VENTILATION_DESCRIPTIONS = [
'mechanical, extract only',
'mechanical, supply and extract'
]
def __init__(
self,
property_instance: Property,
@ -26,9 +21,6 @@ class VentilationRecommendations(Definitions):
self.recommendation = None
self.materials = [part for part in materials if part["type"] == "mechanical_ventilation"]
def identify_ventilation(self):
self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS
def recommend(self, phase):
"""
If there is no ventilation, we recommend installing ventilation
@ -38,8 +30,8 @@ class VentilationRecommendations(Definitions):
:return:
"""
self.identify_ventilation()
if self.has_ventilaion:
self.property.identify_ventilation()
if self.property.has_ventilation:
return
if len(self.materials) != 1:

View file

@ -47,19 +47,19 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation):
# We bundle the impact of ventilation with the measure
total = (
rec["total"] + ventilation_recommendation["total"]
if rec["type"] in assumptions.measures_needing_ventilation
if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation
else rec["total"]
)
gain = (
rec[goal_key] + ventilation_recommendation[goal_key]
if rec["type"] in assumptions.measures_needing_ventilation
if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation
else rec[goal_key]
)
rec_type = (
"+".join(
[rec["type"], ventilation_recommendation["type"]]
) if rec["type"] in assumptions.measures_needing_ventilation
) if rec["type"] in assumptions.measures_needing_ventilation and needs_ventilation
else rec["type"]
)

View file

@ -0,0 +1,71 @@
"""
This is a script for preparing a sample for testing the end to end process, so that when Spring send us
data, we know it will work.
"""
import pandas as pd
from utils.s3 import read_csv_from_s3
birmingham_epcs = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/domestic-E08000025-Birmingham/certificates.csv"
)
# We get the newest EPC, by UPRN and LODGEMENT_DATE
birmingham_epcs['LODGEMENT_DATE'] = pd.to_datetime(birmingham_epcs['LODGEMENT_DATE'])
birmingham_epcs = birmingham_epcs.sort_values(
by=['UPRN', 'LODGEMENT_DATE'],
ascending=[True, False]
).drop_duplicates(subset='UPRN')
birmingham_epcs["postal_region"] = birmingham_epcs["POSTCODE"].str.split(" ").str[0]
addressable_market = birmingham_epcs[
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E', 'D'])) &
(birmingham_epcs['LODGEMENT_DATE'] >= '2020-01-01') &
(birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) &
(birmingham_epcs['TENURE'].isin(
['rental (private)', 'Rented (private)']
))
]
# We take the Spring portfolio and remove the properties in their sample
asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
asset_list = pd.DataFrame(asset_list)
asset_list["postal_region"] = asset_list["postcode"].str.split(" ").str[0]
addressable_market = addressable_market[
~addressable_market["UPRN"].astype(int).astype(str).isin(asset_list["uprn"].values)
]
addressable_market = addressable_market[
addressable_market["postal_region"].isin(asset_list["postal_region"].unique())
]
# Take a sample of properties, EPC F or G, EPC lodged in 2025. We focus on houses/bingalows
sample = birmingham_epcs[
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G'])) &
(birmingham_epcs['LODGEMENT_DATE'] >= '2025-01-01') &
(birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow']))
]
# Prepare the sample, with just the columns we would expect to receive from Spring
# 1) UPRN
# 2) Address
# 3) Postcode
# 4) Property type
# 5) Built form
# 6) Number of bedrooms (we'll simulate this)
# 7) Number of bathrooms (we'll simulate this)
# 8) Valuation (We'll simulate this, around 200,000)
sample = sample[['UPRN', 'ADDRESS', 'POSTCODE', 'PROPERTY_TYPE', 'BUILT_FORM']].copy()
sample['BEDROOMS'] = 3 # Simulating number of bedrooms
sample['BATHROOMS'] = 1 # Simulating number of bathrooms
sample['VALUATION'] = 200000 # Simulating valuation
sample.columns = [x.lower() for x in sample.columns]
# Store this as a excel
sample.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/birmingham_sample.xlsx",
index=False
)

View file

@ -0,0 +1,124 @@
"""
This script prepares the data for the principal pitch modelling
"""
import os
import pandas as pd
from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
PORTFOLIO_ID = 206
USER_ID = 8
EPC_TARGET = "C"
# Read the input file
properties = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Birmingham_price_top300.xlsx"
)
# Keep just the D's and below
properties = properties[properties["current_energy_rating"].isin(["D", "E", "F", "G"])].copy()
# Focus on houses
properties = properties[properties["property_type_std"] != "Flat"]
properties = properties[properties["property_type"] != "flat"]
# Rename the key columns
properties = properties.rename(
columns={
"address1": "address",
"number_of_bathrooms": "n_bathrooms",
"num_beds": "n_bedrooms"
}
)
properties["patch"] = True
# Pull the non-invasive recommendations
asset_list_epc_client = AssetListEpcData(
asset_list=properties,
epc_auth_token=EPC_AUTH_TOKEN
)
asset_list_epc_client.get_data()
asset_list_epc_client.get_non_invasive_recommendations()
asset_list_epc_client.get_patch()
extracted_df = pd.DataFrame(asset_list_epc_client.extracted_data)
epc_df = pd.DataFrame(asset_list_epc_client.epc_data)
# Find examples where patches are different to the api
compare_epc = []
for patch in asset_list_epc_client.patches:
extracted = extracted_df[extracted_df["uprn"] == patch["uprn"]].squeeze()
epc = epc_df[epc_df["uprn"] == patch["uprn"]].squeeze()
compare_epc.append(
{
"uprn": extracted["uprn"],
"address": extracted["address"],
"postcode": extracted["postcode"],
"api_epc": int(extracted["current_epc_efficiency"]),
"fme_epc": int(epc["current-energy-efficiency"]),
}
)
compare_epc = pd.DataFrame(compare_epc)
diff = compare_epc[compare_epc["api_epc"] != compare_epc["fme_epc"]]
# Compare matched addresses to make sure they are the same
compare_addresses = extracted_df[["address", "postcode", "uprn"]].merge(
epc_df[["uprn", "address1", "postcode"]].rename(columns={"address1": "epc_address1", "postcode": "epc_postcode"}),
how="left",
on=["uprn"]
)
# Add on uprn
properties = properties.merge(
extracted_df[["address", "postcode", "uprn"]],
how="left",
on=["address", "postcode"]
)
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=properties,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list_epc_client.non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# Store patches in S3
patches_filename = ""
if asset_list_epc_client.patches:
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list_epc_client.patches),
bucket_name="retrofit-plan-inputs-dev",
file_name=patches_filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "EPC C",
"multi_plan": True,
"budget": None,
"ashp_cop": 3.5,
# This is new - when optimising, we drop scores by a few points to account for SAP 10
"simulate_sap_10": True,
"exclusions": ["external_wall_insulation"],
"required_measures": ["cavity_wall_insulation", "loft_insulation"]
}
print(body)

View file

@ -0,0 +1,224 @@
"""
This script prepares the data for the financial model
"""
import pandas as pd
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
PORTFOLIO_ID = 206
SCENARIOS = [389]
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
for rec in recommendations_query
]
session.close()
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
recommended_measures_df = recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
# Total cost is the row sum, excluding the property_id column
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
columns=["property_id"]
).sum(axis=1)
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
)
df = df.drop(columns=["property_id"])
df["sap_points"] = df["sap_points"].fillna(0)
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round()
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
# We merge this back to the main dataframe, which will contain the bathrooms
from utils.s3 import read_csv_from_s3
asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
asset_list = pd.DataFrame(asset_list)
df["uprn"] = df["uprn"].astype(str)
asset_list = asset_list.merge(
df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
how="left",
on="uprn"
)
condition_costs = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
sheet_name="Prices - Khalim",
header=35
)
# Remove unnamed columns and reset index
condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
condition_costs = condition_costs.reset_index(drop=True)
# We now estimate condition cost
def simulate_condition(asset_list, condition_costs):
"""
This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
costing array looks like.
:param df:
:return:
"""
condition_df = []
for _, row in asset_list.iterrows():
n_bathrooms = row["bathrooms"]
conditions = {}
for condition in reversed(range(1, 11)):
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * row["total_floor_area"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
total_condition_cost = condition_cost.sum()
conditions["Condition " + str(condition)] = (total_condition_cost)
condition_df.append(
{
"uprn": row["uprn"],
**conditions
}
)
condition_df = pd.DataFrame(condition_df)
asset_list = asset_list.merge(
condition_df,
how="left",
on="uprn"
)
return asset_list
# asset_list = simulate_condition(asset_list, condition_costs)
# We calculate the condition cost based on the condition
for _, row in asset_list.iterrows():
condition = row["condition_score"]
if condition in [None, ""]:
continue
condition = int(float(condition))
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * float(row["total_floor_area"])
n_bathrooms = row["n_bathrooms"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
total_condition_cost = condition_cost.sum()
asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
# Store output
asset_list.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
index=False
)
condition_cost_comparison = asset_list[
["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
]