Merge pull request #372 from Hestia-Homes/settle-epc-data

Settle epc data
This commit is contained in:
KhalimCK 2024-11-12 15:50:06 +00:00 committed by GitHub
commit 579d403301
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 339 additions and 18 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -0,0 +1,226 @@
import os
import time
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from recommendations.recommendation_utils import (
estimate_perimeter,
estimate_external_wall_area,
estimate_number_of_floors
)
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def get_data(asset_list):
epc_data = []
errors = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
try:
postcode = home["Postcode"]
house_number = home["AddressLine1"]
full_address = ", ".join([home["AddressLine1"], home["AddressLine4"], home["AddressLine5"]])
searcher = SearchEpc(
address1=str(house_number),
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
epc = {
"row_id": home["row_id"],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
except Exception as e:
errors.append(home["row_id"])
time.sleep(5)
return epc_data, errors
def app():
"""
This app is EPC pulling data for some properties owned by Livewest
Data request contents:
Date of last EPC
Reason for EPC
SAP score on register
Property Type
Property Area
Property Age
Any Dimensions (HLP,PW,RH)
Property Wall Construction
Heating Type
Secondary Heating
Loft Insulation Depth
Additional if possible:
Heat loss calculations
EPC recommendations
Property UPRN
"""
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/SETTLE FULL PROPOSED PROGRAMME.xlsx",
header=0
)
asset_list["row_id"] = asset_list.index
epc_data, errors = get_data(asset_list)
# We now retrieve any failed properties
asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
epc_data_failed, _ = get_data(asset_list_failed)
# Append the failed data to the main data
epc_data.extend(epc_data_failed)
epc_df = pd.DataFrame(epc_data)
# We expand out the recommendations
recommendations_df = epc_df[["row_id", "recommendations"]]
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
columns = ["row_id"] + list(unique_recommendations)
transformed_data = []
for _, row in recommendations_df.iterrows():
# Initialize a dictionary for this row with False for all recommendations
row_data = {col: False for col in columns}
row_data["row_id"] = row["row_id"]
# Set True for each recommendation present in this row
for rec in row["recommendations"]:
recommendation_text = rec["improvement-summary-text"]
row_data[recommendation_text] = True
# Append the row data to transformed_data
transformed_data.append(row_data)
transformed_df = pd.DataFrame(transformed_data)
# Drop the column that is ""
transformed_df = transformed_df.drop(columns=[""])
# Retrieve just the data we need
epc_df = epc_df[
[
"row_id",
"uprn",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
# New fields needed
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
#
"energy-consumption-current", # kwh/m2
]
]
asset_list = asset_list.merge(
epc_df,
how="left",
on="row_id"
).merge(
transformed_df,
how="left",
on="row_id"
)
asset_list = asset_list.drop(columns=["row_id"])
# Rename the columns
asset_list = asset_list.rename(columns={
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)"
})
asset_list["Estimated Number of Floors"] = asset_list.apply(
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
x["Property Type"]) else None, axis=1
)
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
# Replace "" value with None
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
lambda x: estimate_perimeter(
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
), axis=1
)
asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["Estimated Number of Floors"],
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
perimeter=x["Estimated Perimeter (m)"],
built_form=x["Archetype"]
),
axis=1
)
asset_list["Roof Insulation Thickness"] = asset_list.apply(
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
x["Roof Construction"]) else None,
axis=1
)
# Store as an excel
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/Settle EPC Data pull - 08 Nov 2024.xlsx"
asset_list.to_excel(filename, index=False)

View file

@ -8,7 +8,7 @@ from collections import Counter
CUSTOMER_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
SURVEY_FOLDERS = os.path.join(CUSTOMER_FOLDER_PATH, "StonewaterSurveys_{i}")
NUM_FOLDERS = 14
NUM_FOLDERS = 15
def sap_to_epc(sap_points: int | float):
@ -871,7 +871,10 @@ def main():
# We now merge on the coordinator data so that against each property, we can map the measures
retrofit_packages_board = pd.read_excel(
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater 3.0 Updated SAP Pre & Modelled 29.10.24.xlsx"),
os.path.join(
CUSTOMER_FOLDER_PATH,
"Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1731315080 11.11.24.xlsx"
),
header=4
)
retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])]
@ -902,13 +905,25 @@ def main():
# '102 Cheaton Close': '',
# 'Flat 16 Spring Gardens': '',
# '4 Apple Close': '',
'25 Folly Lane': '',
# '25 Folly Lane': '',
'2 Calshot Walk': 'StonewaterSurveys_3/156-3-2 Calshot Walk-MK41 8QS',
'21 Constitution Hill': 'StonewaterSurveys_1/112-11-21 Constitution Hill-BH14 0PX',
'22 Constitution Hill': 'StonewaterSurveys_4/185-8-22 Constitution Hill-BH14 0PX',
'2 Marches Cottages, School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA',
'26, Copthorn House, Brighton Road': 'StonewaterSurveys_15/133-1-26 Brighton Road-KT20 6BQ',
'4, Old St Marys, Ripley Lane': "StonewaterSurveys_15/433-3-4 Ripley Lane-KT24 6JG",
'1 Nelson House, Short Street': 'StonewaterSurveys_15/89-2-1 Short Street-GU11 1HX',
"18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX',
'3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX',
'16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ',
'20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX',
'7 Croft Street': 'StonewaterSurveys_8/333-2-7 Croft Street-HR6 8LA'
}
# We now match this retrofit packages board to the extracted data
matching_lookup = []
for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
# Handle the case that has the wrong postcode in the asset data
if home["Name"] in manual_filters:
filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy()
@ -972,7 +987,11 @@ def main():
missing_ids = list(missing_ids)
if missing_ids:
# We check that the missing ids have no data yet
if len(missing_ids) != 8:
# missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)]
# missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
# CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
if len(missing_ids) != 6:
raise Exception("Unacceptable number of missings")
if matching_lookup["Address ID"].duplicated().sum():
@ -1065,12 +1084,20 @@ def main():
stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"])
windows_data["Address ID"] = windows_data["Address ID"].astype(float)
stonewater_data = stonewater_data.merge(windows_data, on="Address ID", how="left")
stonewater_data = stonewater_data.sort_values("Archetype ID", ascending=True)
if stonewater_data["Address ID"].duplicated().sum():
raise Exception("Duplicate Address IDs")
for c in [
'Window attributes - Fitted/renewed date',
'Parent Asset Window attributes - Fitted/renewed date',
'Fitted/renewed date'
]:
stonewater_data[c] = stonewater_data[c].astype(str)
# Save this data to excel
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages.xlsx", index=False)
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V2.xlsx", index=False)
cost_sheet = [
{
@ -1155,7 +1182,7 @@ def main():
create_proposed_wave_3_bid(
costed_packages_filepath=os.path.join(
CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) MR Review v1.xlsx"
CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) Single Model V3.xlsx"
),
archetypes_sheet_filepath=os.path.join(
CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
@ -1165,8 +1192,8 @@ def main():
def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepath):
# We read in the costed packages
# Note: Header as 12 is for Matt Ratcliff's reviewed version
costed_packages = pd.read_excel(costed_packages_filepath, header=13, sheet_name="Modelled Packages")
costed_packages = costed_packages[~pd.isnull(costed_packages["Address"])]
archetypes_to_cost = costed_packages[
[
@ -1195,16 +1222,11 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
'Existing Primary Heating System',
'Existing Primary Heating PCDF Reference'])
# We take properties that are EPC D and below (61% of units)
# We take properties that are EPC D and below (59% of units)
archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]
archetypes_to_cost["Has been modelled"] = ~pd.isnull(archetypes_to_cost["Modelled SAP Band"])
average_cost = archetypes_to_cost[
archetypes_to_cost["Has been modelled"]
]['Total Cost of Measures inc Contingency'].mean()
print(average_cost)
# These are the Arhetypes that will likely be suitable for Wave 3
archetypes_sheet = pd.read_excel(archetypes_sheet_filepath, header=4)
archetypes_sheet = archetypes_sheet[~pd.isnull(archetypes_sheet["Address ID"])]
@ -1218,7 +1240,21 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
how="left"
)
proposed_sample = archetypes_sheet[archetypes_sheet["Archetype ID"].isin(archetypes_to_cost["Archetype ID"])]
proposed_sample = archetypes_sheet[
archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
]
not_proposed = archetypes_sheet[
~archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
]
# archetypes_without_survey = []
# for p in list(set(not_proposed)):
# filtered = costed_packages[costed_packages["Archetype ID"].astype(int).astype(str) == p]
# if filtered.empty:
# archetypes_without_survey.append(p)
# Can we propose anything about archetypes that were not surveyed?
proposed_sample = proposed_sample[
[
@ -1229,6 +1265,8 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
# We classify into high and low confidence
archetypes_to_cost["Surveyed Main Roof"] = archetypes_to_cost["Surveyed Main Roof"].fillna("")
match_classification = []
for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):
@ -1313,8 +1351,65 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
None, proposed_sample["Total Cost of Measures inc Contingency"]
)
proposed_sample = proposed_sample.sort_values("Archetype ID", ascending=True)
# Save excel
proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid (WIP).xlsx", index=False)
proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid V2 (WIP).xlsx", index=False)
# For each postcode that's in the bid, we also summarise the number of units in the bid and number left out
proposed_sample_postcodes = proposed_sample["Postcode"].unique()
postcode_summary = []
for postcode in proposed_sample_postcodes:
in_proposal = proposed_sample[proposed_sample["Postcode"] == postcode]
not_in_proposal = not_proposed[not_proposed["Postcode"] == postcode]
postcode_summary.append(
{
"Postcode": postcode,
"Number of properties in Proposal": len(in_proposal),
"Number of properties not in Proposal": len(not_in_proposal)
}
)
postcode_summary = pd.DataFrame(postcode_summary)
postcode_summary = postcode_summary.sort_values(
"Number of properties not in Proposal",
ascending=False).reset_index(drop=True)
postcode_summary.to_excel(
CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid Postcode Summary.xlsx", index=False
)
def find_remaining_surveys():
"""
This compares a list of properties that have been surveyed against a list of properties that I have produced
costed retrofit packages for, so I know what needs to be downloaded from Sharepoint
:return:
"""
surveyed = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
"/Stonewater_SHDF_3_0_Board_work_in_progress_- 07.11.24.xlsx",
header=4
)
costed = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Costed Retrofit Packages "
"20241030 (WIP) MR Review v1.xlsx",
header=13,
sheet_name="Modelled Packages"
)
costed = costed[~pd.isnull(costed["Address ID"])]
needed = surveyed[~surveyed["Address ID"].isin(costed["Address ID"])]
needed["id"] = needed["Archetype ID"].astype(str) + "-" + needed["Arch. Group Rank"].astype(str)
needed = needed.sort_values("id", ascending=True)
needed[["id", "Name", "Postcode"]].to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/needed_surveys.csv"
)
assert needed.shape[0] + costed.shape[0] == surveyed.shape[0]
# if __name__ == "__main__":
# main()