putting together outputs

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-04 14:04:20 +00:00
parent 10bc433283
commit 139db23592
3 changed files with 360 additions and 81 deletions

View file

@ -1,5 +1,6 @@
import os
from pyexpat import features
from urllib import parse
from fuzzywuzzy import fuzz
import PyPDF2
import re
@ -2936,6 +2937,14 @@ def identify_incorrect_packages():
)
def extract_sharepoint_url(x):
if pd.isnull(x):
return ""
return "/".join(parse.urlparse(
x.split(" - http")[1]
).path.replace("%20", " ").split("/")[-2:])
def revised_model():
"""
This function implements the revised model for Stonewater, where we are looking at new priority postcodes
@ -2956,6 +2965,7 @@ def revised_model():
original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
original_archetypes["UPRN"] = original_archetypes["UPRN"].astype("Int64").astype(str)
# Check if we have all of the addresses
missed = original_archetypes[
@ -2965,7 +2975,7 @@ def revised_model():
assert set(missed) == {'NOT PRIORITY POSTCODE', 'IN WAVE 2.1', 'EPC C OR ABOVE'}
original_archetypes = original_archetypes[
["Address ID", "Archetype ID", "Archetype Group Rank"]
["Address ID", "Archetype ID", "Archetype Group Rank", "UPRN"]
]
# Merge these archetypes on to the new priority postcodes
@ -3104,6 +3114,42 @@ def revised_model():
# Replace \n with ""
retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "")
retrofit_assessments_data_columns = [
'Current SAP Rating', 'Current EPC Band', 'Primary Energy Use (kWh/yr)',
'Primary Energy Use Intensity (kWh/m2/yr)', 'Number of Storeys',
'Fuel Bill', 'Window Age Description',
'Window Age Description Proportion (%)',
'Secondary Window Age Description',
'Secondary Window Age Description Proportion (%)', 'Number of Windows',
'Total Number of Doors', 'Number of Insulated Doors',
'Existing Primary Heating System',
'Existing Primary Heating PCDF Reference',
'Existing Primary Heating Controls',
'Existing Primary Heating % of Heat',
'Existing Secondary Heating System',
'Existing Secondary Heating PCDF Reference',
'Existing Secondary Heating Controls',
'Existing Secondary Heating % of Heat', 'Secondary Heating Code',
'Water Heating Code', 'Total Floor Area (m2)',
'Total Ground Floor Area (m2)', 'RIR Floor Area',
'Main Building Wall Area (m2)', 'First Extension Wall Area (m2)',
'Number of Light Fittings', 'Number of LEL Fittings',
'Number of fittings needing LEL', 'Main Roof Type',
'Main Roof Insulation', 'Main Roof Insulation Thickness',
'Main Wall Type', 'Main Wall Insulation', 'Main Wall Dry-lining',
'Main Wall Thickness', 'Main Building Alternative Wall Type',
'Main Building Alternative Wall Insulation',
'Main Building Alternative Wall Dry-lining',
'Main Building Alternative Wall Thickness', 'Main Fuel'
]
# For the columns in retrofit_assessments_data_columns, prefix all of them with Survey:
retrofit_assessments_data_columns_prefixed = ["Survey: " + x for x in retrofit_assessments_data_columns]
rename_dict = dict(zip(retrofit_assessments_data_columns, retrofit_assessments_data_columns_prefixed))
retrofit_assessment_data = retrofit_assessment_data.rename(columns=rename_dict)
retrofit_assessment_data["Survey: Current EPC Band"] = (
retrofit_assessment_data["Survey: Current SAP Rating"].apply(lambda x: sap_to_epc(x))
)
# We can read in the data as needed
# Next Step: Read in the coordinated measures and match to the extracted data
@ -3134,14 +3180,6 @@ def revised_model():
ccs_coordination_sheet = ccs_coordination_sheet.head(87)
ccs_coordination = pd.concat([ccs_coordination_removed_from_programme, ccs_coordination_sheet])
from urllib import parse
def extract_sharepoint_url(x):
if pd.isnull(x):
return ""
return "/".join(parse.urlparse(
x.split(" - http")[1]
).path.replace("%20", " ").split("/")[-2:])
ccs_coordination["folder_path"] = ccs_coordination["Sharepoint Link"].apply(lambda x: extract_sharepoint_url(x))
############################################################
@ -3224,8 +3262,6 @@ def revised_model():
lambda x: extract_sharepoint_url(x)
)
# Combine the data back
############################################################
# NEW 450 COORDINATED RETROFIT ASSESSMENTS
#############################################################
@ -3352,7 +3388,6 @@ def revised_model():
)
ccs_coordination = ccs_coordination[~pd.isnull(ccs_coordination["Postcode"])]
ccs_coordination = ccs_coordination[ccs_coordination["Retrofit Assessment"] != "Outstanding"]
from fuzzywuzzy import fuzz
ccs_manual_filters = {
"35 Kittiwake Close": "Wave 2.1 Surveys/11. CCS Dorset/Kittiwake Close 35"
@ -3596,6 +3631,17 @@ def revised_model():
matching_lookup, how="left", on="Name"
)
# We now map the retrofit assessment data to the coordinated packages
wates_coordination = wates_coordination.merge(
retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
)
ccs_coordination = ccs_coordination.merge(
retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
)
retrofit_packages_board = retrofit_packages_board.merge(
retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
)
# We have 4 properties in the Wates coordination board, that we want to remove from the retrofit packages board
to_remove = wates_coordination[
wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"])
@ -3617,8 +3663,8 @@ def revised_model():
'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
'Solar PV', 'Other measures', 'Organisation Reference',
]
],
] + retrofit_assessments_data_columns_prefixed
],
ccs_coordination[
[
# We don't have secondary wall insulation, Flat Roof, RIR, Heating Controls,
@ -3627,8 +3673,8 @@ def revised_model():
'SAP Band Install Package', 'Package Approved (Client)',
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
'Ventilation', 'Heating', 'Other Measures', "Asset ID.1_y",
]
].rename(
] + retrofit_assessments_data_columns_prefixed
].rename(
columns={
"SAP Band Pre": "Actual SAP Band",
"SAP Rating Pre": "Actual SAP Rating",
@ -3651,8 +3697,8 @@ def revised_model():
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x'
]
].rename(
] + retrofit_assessments_data_columns_prefixed
].rename(
columns={
"SAP Band Pre": "Actual SAP Band",
"SAP Rating Pre": "Actual SAP Rating",
@ -3681,24 +3727,8 @@ def revised_model():
on="Organisation Reference"
)
# We match the properties to their closest match
# We clean up the SAP ratings in the coordinated packages
def sap_to_number(x):
try:
return int(x)
except:
if x[-1] in ["A", "B", "C", "D", "E", "F"]:
return int(x[:-1])
if x[0] in ["A", "B", "C", "D", "E", "F"]:
return int(x[1:])
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Band"])]
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Rating"])]
coordinated_packages["Actual SAP Rating"] = coordinated_packages["Actual SAP Rating"].apply(
lambda x: sap_to_number(x)
)
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current EPC Band"])]
coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current SAP Rating"])]
# We need the features pertaining to these priority postcodes
@ -3721,6 +3751,11 @@ def revised_model():
if not match.empty:
return match
# Finally, we search for a property in the same Archetype
match = coordinated_packages[coordinated_packages["Archetype ID"] == home["Archetype ID"]]
if not match.empty:
return match
return None # No match found
coordinated_packages["Postal Region"] = coordinated_packages["Postcode"].str.split(" ").str[0].str.strip()
@ -3732,6 +3767,12 @@ def revised_model():
coordinated_packages["Primary Property Type"] = coordinated_packages["Property Type"].str.split(":").str[0]
new_priority_postcodes["Primary Property Type"] = new_priority_postcodes["Property Type"].str.split(":").str[0]
coordinated_packages = coordinated_packages.merge(
new_priority_postcodes[["Organisation Reference", "Archetype ID"]],
how="left",
on="Organisation Reference"
)
# For every property in the priority postcodes data, we look for a most appropriate matching property
no_match = []
matches = []
@ -3759,16 +3800,17 @@ def revised_model():
no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False)
# len(no_match)
# 8764, 5607, 5646
# 8764, 5607, 5646, 5071
# no_match_summary.shape
# (3953, 6), (2948, 6), (2969, 7)
# (3953, 6), (2948, 6), (2969, 7), (2575, 7)
matches_df = pd.DataFrame(matches)
matches_df = matches_df.merge(
coordinated_packages[["Organisation Reference", "Actual SAP Band", "Actual SAP Rating"]],
coordinated_packages[["Organisation Reference", "Survey: Current EPC Band", "Survey: Current SAP Rating"]],
left_on="Best Match Organisation Reference", right_on="Organisation Reference",
suffixes=("", " - Closest Match")
)
# We want to aggregate the matches, when we have multiple
aggregated_matches_df = []
for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
@ -3778,19 +3820,21 @@ def revised_model():
"Organisation Reference": org_ref,
"Number of matches": 1,
"Proportion": 100,
"Estimated SAP Rating": mapped_matches["Actual SAP Rating"].values[0],
"Estimated EPC Rating": sap_to_epc(mapped_matches["Actual SAP Rating"].values[0])
"Estimated SAP Rating": mapped_matches["Survey: Current SAP Rating"].values[0],
"Estimated EPC Rating": mapped_matches["Survey: Current EPC Band"].values[0]
}
)
continue
# We need to aggregate the matches, since we have multiple
average_rating = mapped_matches["Actual SAP Rating"].mean()
average_rating = mapped_matches["Survey: Current SAP Rating"].mean()
number_of_matches = mapped_matches.shape[0]
average_epc_rating = sap_to_epc(average_rating)
# proportion is the number of properties that have this EPC rating
proportion_with_this_epc = int(
mapped_matches[mapped_matches["Actual SAP Band"] == average_epc_rating].shape[0] / number_of_matches * 100)
mapped_matches[mapped_matches["Survey: Current EPC Band"] == average_epc_rating].shape[
0] / number_of_matches * 100
)
aggregated_matches_df.append(
{
"Organisation Reference": org_ref,
@ -3804,12 +3848,220 @@ def revised_model():
aggregated_matches_df = pd.DataFrame(aggregated_matches_df)
mapped_priority_list = new_priority_postcodes.merge(
matches_df, on="Organisation Reference",
aggregated_matches_df, on="Organisation Reference", how="left"
)
# We merge on the EPC ratings for the matched properties
mapped_priority_list = mapped_priority_list.merge(
mapped_priority_list["address1"] = mapped_priority_list["Address"].str.split(",").str[0]
# If we have a leading number like 01, 02, 03, 04, 05, 06, 07, 08, 09, we remove the leading 0
def remove_leading_zero(address):
return re.sub(r"^0([1-9]) ", r"\1 ", address)
# Example usage
mapped_priority_list["address1"] = mapped_priority_list["address1"].apply(remove_leading_zero)
mapped_priority_list["address1"] = np.where(
mapped_priority_list["Organisation Reference"] == 37004,
"8 Mason Road",
mapped_priority_list["address1"]
)
mapped_priority_list["address1"] = np.where(
mapped_priority_list["Organisation Reference"] == 37003,
"9 Mason Road",
mapped_priority_list["address1"]
)
mapped_priority_list = mapped_priority_list.rename(
columns={"UPRN": "uprn"}
)
mapped_priority_list["row_id"] = mapped_priority_list["Organisation Reference"]
# Let's get the newest EPC data for these properties
# We merge on UPRN, when we have it
# from etl.route_march_data_pull.app import get_data
# epc_data, errors, nodata = get_data(
# asset_list=mapped_priority_list,
# fulladdress_column="Address",
# address1_column="address1",
# postcode_column="Postcode",
# manual_uprn_map={},
# epc_api_only=True
# )
#
# epc_df = pd.DataFrame(epc_data)
# epc_df.to_csv(
# os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv"), index=False
# )
epc_df = pd.read_csv(os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv"))
epc_df = epc_df.rename(columns={"row_id": "Organisation Reference"})
# We now package up the data
# Sheet 1 is the base coordination data
output_coordination_sheet = coordinated_packages[
[
"Name", "Postcode", 'Organisation Reference', 'Package Ref',
'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
'Solar PV', 'Other measures', 'Survey: Current SAP Rating', 'Survey: Current EPC Band',
'Survey: Primary Energy Use (kWh/yr)',
'Survey: Primary Energy Use Intensity (kWh/m2/yr)',
'Survey: Number of Storeys', 'Survey: Fuel Bill',
'Survey: Window Age Description',
'Survey: Window Age Description Proportion (%)',
'Survey: Secondary Window Age Description',
'Survey: Secondary Window Age Description Proportion (%)',
'Survey: Number of Windows', 'Survey: Total Number of Doors',
'Survey: Number of Insulated Doors',
'Survey: Existing Primary Heating System',
'Survey: Existing Primary Heating PCDF Reference',
'Survey: Existing Primary Heating Controls',
'Survey: Existing Primary Heating % of Heat',
'Survey: Existing Secondary Heating System',
'Survey: Existing Secondary Heating PCDF Reference',
'Survey: Existing Secondary Heating Controls',
'Survey: Existing Secondary Heating % of Heat',
'Survey: Secondary Heating Code', 'Survey: Water Heating Code',
'Survey: Total Floor Area (m2)', 'Survey: Total Ground Floor Area (m2)',
'Survey: RIR Floor Area', 'Survey: Main Building Wall Area (m2)',
'Survey: First Extension Wall Area (m2)',
'Survey: Number of Light Fittings', 'Survey: Number of LEL Fittings',
'Survey: Number of fittings needing LEL', 'Survey: Main Roof Type',
'Survey: Main Roof Insulation',
'Survey: Main Roof Insulation Thickness', 'Survey: Main Wall Type',
'Survey: Main Wall Insulation', 'Survey: Main Wall Dry-lining',
'Survey: Main Wall Thickness',
'Survey: Main Building Alternative Wall Type',
'Survey: Main Building Alternative Wall Insulation',
'Survey: Main Building Alternative Wall Dry-lining',
'Survey: Main Building Alternative Wall Thickness',
'Survey: Main Fuel',
'Walls', 'Roofs', 'Heating', 'Main Fuel', 'Age', 'Property Type'
]
].rename(
columns={
'Walls': "Parity - Walls",
'Roofs': "Parity - Roof",
'Heating': "Parity - Heating",
'Main Fuel': "Parity - Fuel",
'Age': "Parity - Age Band",
'Property Type': "Parity - Property Type"
}
)
# Sheet 2 is the lookup table which maps the properties to their closest match
# We need to bring in the parity attributes between the mapped properties so we can see side-by-side
mapped_lookup = matches_df[
[
'Organisation Reference',
'Best Match Organisation Reference',
'Survey: Current EPC Band',
'Survey: Current SAP Rating'
]
].rename(
columns={
'Best Match Organisation Reference': "Best Match - Organisation Reference",
"Survey: Current EPC Band": "Best Match - Survey: Current EPC Band",
'Survey: Current SAP Rating': "Best Match - Survey: Current SAp Rating"
}
).merge(
features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]],
how="left",
on="Organisation Reference"
).merge(
features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]].rename(
columns={
"Organisation Reference": "Best Match - Organisation Reference",
"Walls": "Best Match - Walls",
"Roofs": "Best Match - Roof",
"Heating": "Best Match - Heating",
"Main Fuel": "Best Match - Main Fuel",
"Age": "Best Match - Age",
"Property Type": "Best Match - Property Type"
}
),
how="left",
on="Best Match - Organisation Reference"
).merge(
coordinated_packages[
[
"Organisation Reference", 'Survey: Main Wall Type', 'Survey: Main Wall Insulation',
'Survey: Main Roof Type', 'Survey: Main Roof Insulation', 'Survey: Main Roof Insulation Thickness',
'Survey: Existing Primary Heating System',
]
].rename(
columns={
"Organisation Reference": "Best Match - Organisation Reference",
'Survey: Main Wall Type': 'Best Match - Survey: Main Wall Type',
'Survey: Main Wall Insulation': 'Best Match - Survey: Main Wall Insulation',
'Survey: Main Roof Type': 'Best Match - Survey: Main Roof Type',
'Survey: Main Roof Insulation': 'Best Match - Survey: Main Roof Insulation',
'Survey: Main Roof Insulation Thickness': 'Best Match - Survey: Main Roof Insulation Thickness',
'Survey: Existing Primary Heating System': 'Best Match - Survey: Existing Primary Heating System',
}
),
how="left",
on="Best Match - Organisation Reference"
)
# Finally, we have the property, against the mapped home with the estimate SAP scores and the EPC data
worksheet = mapped_priority_list[
[
'Organisation Reference', 'Address', 'Postcode', 'Address ID', 'uprn', 'Archetype ID',
'SAP', 'SAP Band', "Property Type", "Walls", "Roofs", 'Glazing',
'Heating', 'Main Fuel', 'Hot Water', 'Estimated SAP Rating', 'Estimated EPC Rating'
]
].rename(
columns={
"SAP": "Parity - SAP Rating",
"SAP Band": "Parity - EPC Rating",
"Property Type": "Parity - Property Type",
"Walls": "Parity - Walls",
"Roofs": "Parity - Roofs",
'Glazing': "Parity - Glazing",
'Heating': 'Parity - Heating',
'Main Fuel': 'Parity - Main Fuel',
'Hot Water': 'Parity - Hot Water',
}
).merge(
epc_df[
[
"Organisation Reference",
"uprn",
"current-energy-efficiency",
"current-energy-rating",
"lodgement-date",
"construction-age-band",
"walls-description",
"roof-description",
"mainheat-description",
"windows-description",
"hotwater-description",
"main-fuel",
"total-floor-area",
]
].rename(
columns={
"uprn": "Last EPC - uprn",
"current-energy-efficiency": "Last EPC - SAP Score",
"current-energy-rating": "Last EPC - EPC Rating",
"lodgement-date": "Last EPC - Date Lodged",
"construction-age-band": "Last EPC - Age Band",
"walls-description": "Last EPC - Walls",
"roof-description": "Last EPC - Roof",
"mainheat-description": "Last EPC - Heating",
"windows-description": "Last EPC - Windows",
"hotwater-description": "Last EPC - Hot Water",
"main-fuel": "Last EPC - Main Fuel",
"total-floor-area": "Last EPC - Total Floor Area"
}
),
how="left",
on='Organisation Reference'
)
worksheet["Years Since Last EPC"]
# if __name__ == "__main__":
# main()

View file

@ -20,7 +20,7 @@ load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def get_data(asset_list, fulladdress_column, address1_column, postcode_column, manual_uprn_map):
def get_data(asset_list, fulladdress_column, address1_column, postcode_column, manual_uprn_map, epc_api_only=True):
epc_data = []
errors = []
no_epc = []
@ -33,6 +33,11 @@ def get_data(asset_list, fulladdress_column, address1_column, postcode_column, m
if house_no is None:
house_no = house_number
uprn = manual_uprn_map.get(full_address, None)
if uprn is None and home.get("uprn"):
uprn = home["uprn"]
if pd.isnull(uprn):
uprn = None
searcher = SearchEpc(
address1=str(house_no),
@ -88,6 +93,15 @@ def get_data(asset_list, fulladdress_column, address1_column, postcode_column, m
no_epc.append(home["row_id"])
continue
if epc_api_only:
epc = {
"row_id": home["row_id"],
**searcher.newest_epc.copy()
}
epc_data.append(epc)
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])

View file

@ -1,6 +1,9 @@
import os
import PyPDF2
from string import Template
import pandas as pd
from survey_report.extraction.detect_report_type import detect_report_type
from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor
@ -34,44 +37,54 @@ def handle():
:return:
"""
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2"
folders = [
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1",
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2",
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3",
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 4",
"/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 5",
]
data = []
for data_folder in folders:
folder_contents = os.listdir(data_folder)
# We look for the following files:
# Site notes
file_mapping = {}
for file in folder_contents:
# Check if it's a pdf file
if not file.endswith(".pdf"):
continue
filepath = os.path.join(data_folder, file)
with (open(filepath, "rb") as f):
pdf = PyPDF2.PdfReader(f)
first_page = pdf.pages[0].extract_text()
text = ""
for page in pdf.pages:
text += page.extract_text()
folder_contents = os.listdir(data_folder)
# We look for the following files:
# Site notes
file_mapping = {}
for file in folder_contents:
# Check if it's a pdf file
if not file.endswith(".pdf"):
continue
filepath = os.path.join(data_folder, file)
with (open(filepath, "rb") as f):
pdf = PyPDF2.PdfReader(f)
first_page = pdf.pages[0].extract_text()
text = ""
for page in pdf.pages:
text += page.extract_text()
# Check the report type
report_type = detect_report_type(first_page)
if report_type is not None:
file_mapping[report_type] = text
# Check the report type
report_type = detect_report_type(first_page)
if report_type is not None:
file_mapping[report_type] = text
# This is only set up to work with quido site notes so we must have it
site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"])
site_notes = site_notes_extractor.extract_all()
# This is only set up to work with quido site notes so we must have it
site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"])
site_notes = site_notes_extractor.extract_all()
# We also must have an EPR
epr_extractor = EPRExtractor(file_mapping["quidos_epr"])
epr = epr_extractor.extract_all()
# We also must have an EPR
epr_extractor = EPRExtractor(file_mapping["quidos_epr"])
epr = epr_extractor.extract_all()
# We now produce the combined data sheet which is the starting figure:
data_sheet = {**epr, **site_notes}
del data_sheet['Building Dimensions']
# We unnest the Total Building Dimensions
data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
del data_sheet["Total Building Dimensions"]
# We now produce the combined data sheet which is the starting figure:
data_sheet = {**epr, **site_notes}
del data_sheet['Building Dimensions']
# We unnest the Total Building Dimensions
data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
del data_sheet["Total Building Dimensions"]
data.append(data_sheet)
data = pd.DataFrame(data)
# Generate the HTML report
# Placeholder locations