mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
226 lines
7.4 KiB
Python
226 lines
7.4 KiB
Python
import os
|
|
import time
|
|
|
|
import pandas as pd
|
|
from tqdm import tqdm
|
|
|
|
from dotenv import load_dotenv
|
|
from utils.s3 import read_excel_from_s3
|
|
from backend.SearchEpc import SearchEpc
|
|
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
|
|
|
from recommendations.recommendation_utils import (
|
|
estimate_perimeter,
|
|
estimate_external_wall_area,
|
|
estimate_number_of_floors
|
|
)
|
|
|
|
load_dotenv(dotenv_path="backend/.env")
|
|
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
|
|
|
|
|
def get_data(asset_list):
|
|
epc_data = []
|
|
errors = []
|
|
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
|
|
try:
|
|
postcode = home["Postcode"]
|
|
house_number = home["AddressLine1"]
|
|
full_address = ", ".join([home["AddressLine1"], home["AddressLine4"], home["AddressLine5"]])
|
|
|
|
searcher = SearchEpc(
|
|
address1=str(house_number),
|
|
postcode=postcode,
|
|
auth_token=EPC_AUTH_TOKEN,
|
|
os_api_key="",
|
|
property_type=None,
|
|
fast=True,
|
|
full_address=full_address,
|
|
max_retries=5
|
|
)
|
|
# Force the skipping of estimating the EPC
|
|
searcher.ordnance_survey_client.property_type = None
|
|
searcher.ordnance_survey_client.built_form = None
|
|
|
|
searcher.find_property(skip_os=True)
|
|
if searcher.newest_epc is None:
|
|
continue
|
|
|
|
# Look for EPC recommendatons
|
|
try:
|
|
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
|
|
except:
|
|
property_recommendations = {"rows": []}
|
|
|
|
epc = {
|
|
"row_id": home["row_id"],
|
|
**searcher.newest_epc.copy(),
|
|
"recommendations": property_recommendations["rows"]
|
|
}
|
|
|
|
epc_data.append(epc)
|
|
except Exception as e:
|
|
errors.append(home["row_id"])
|
|
time.sleep(5)
|
|
|
|
return epc_data, errors
|
|
|
|
|
|
def app():
|
|
"""
|
|
This app is EPC pulling data for some properties owned by Livewest
|
|
|
|
Data request contents:
|
|
Date of last EPC
|
|
Reason for EPC
|
|
SAP score on register
|
|
Property Type
|
|
Property Area
|
|
Property Age
|
|
Any Dimensions (HLP,PW,RH)
|
|
Property Wall Construction
|
|
Heating Type
|
|
Secondary Heating
|
|
Loft Insulation Depth
|
|
|
|
Additional if possible:
|
|
Heat loss calculations
|
|
EPC recommendations
|
|
Property UPRN
|
|
|
|
"""
|
|
asset_list = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/SETTLE FULL PROPOSED PROGRAMME.xlsx",
|
|
header=0
|
|
)
|
|
asset_list["row_id"] = asset_list.index
|
|
|
|
epc_data, errors = get_data(asset_list)
|
|
|
|
# We now retrieve any failed properties
|
|
asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
|
|
epc_data_failed, _ = get_data(asset_list_failed)
|
|
|
|
# Append the failed data to the main data
|
|
epc_data.extend(epc_data_failed)
|
|
|
|
epc_df = pd.DataFrame(epc_data)
|
|
|
|
# We expand out the recommendations
|
|
recommendations_df = epc_df[["row_id", "recommendations"]]
|
|
|
|
unique_recommendations = set()
|
|
for _, row in recommendations_df.iterrows():
|
|
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
|
|
|
columns = ["row_id"] + list(unique_recommendations)
|
|
transformed_data = []
|
|
for _, row in recommendations_df.iterrows():
|
|
# Initialize a dictionary for this row with False for all recommendations
|
|
row_data = {col: False for col in columns}
|
|
row_data["row_id"] = row["row_id"]
|
|
|
|
# Set True for each recommendation present in this row
|
|
for rec in row["recommendations"]:
|
|
recommendation_text = rec["improvement-summary-text"]
|
|
row_data[recommendation_text] = True
|
|
|
|
# Append the row data to transformed_data
|
|
transformed_data.append(row_data)
|
|
|
|
transformed_df = pd.DataFrame(transformed_data)
|
|
# Drop the column that is ""
|
|
transformed_df = transformed_df.drop(columns=[""])
|
|
|
|
# Retrieve just the data we need
|
|
epc_df = epc_df[
|
|
[
|
|
"row_id",
|
|
"uprn",
|
|
"property-type",
|
|
"built-form",
|
|
"inspection-date",
|
|
"current-energy-rating",
|
|
"current-energy-efficiency",
|
|
"roof-description",
|
|
"walls-description",
|
|
"transaction-type",
|
|
# New fields needed
|
|
"secondheat-description",
|
|
"total-floor-area",
|
|
"construction-age-band",
|
|
"floor-height",
|
|
"number-habitable-rooms",
|
|
"mainheat-description",
|
|
#
|
|
"energy-consumption-current", # kwh/m2
|
|
]
|
|
]
|
|
|
|
asset_list = asset_list.merge(
|
|
epc_df,
|
|
how="left",
|
|
on="row_id"
|
|
).merge(
|
|
transformed_df,
|
|
how="left",
|
|
on="row_id"
|
|
)
|
|
|
|
asset_list = asset_list.drop(columns=["row_id"])
|
|
|
|
# Rename the columns
|
|
asset_list = asset_list.rename(columns={
|
|
"inspection-date": "Date of last EPC",
|
|
"current-energy-efficiency": "SAP score on register",
|
|
"current-energy-rating": "EPC rating on register",
|
|
"property-type": "Property Type",
|
|
"built-form": "Archetype",
|
|
"total-floor-area": "Property Floor Area",
|
|
"construction-age-band": "Property Age Band",
|
|
"floor-height": "Property Floor Height",
|
|
"number-habitable-rooms": "Number of Habitable Rooms",
|
|
"walls-description": "Wall Construction",
|
|
"roof-description": "Roof Construction",
|
|
"mainheat-description": "Heating Type",
|
|
"secondheat-description": "Secondary Heating",
|
|
"transaction-type": "Reason for last EPC",
|
|
"energy-consumption-current": "Heat Demand (kWh/m2)"
|
|
})
|
|
|
|
asset_list["Estimated Number of Floors"] = asset_list.apply(
|
|
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
|
|
x["Property Type"]) else None, axis=1
|
|
)
|
|
|
|
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
|
|
# Replace "" value with None
|
|
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
|
|
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
|
|
|
|
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
|
|
lambda x: estimate_perimeter(
|
|
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
|
|
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
|
|
), axis=1
|
|
)
|
|
|
|
asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
|
|
lambda x: estimate_external_wall_area(
|
|
num_floors=x["Estimated Number of Floors"],
|
|
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
|
|
perimeter=x["Estimated Perimeter (m)"],
|
|
built_form=x["Archetype"]
|
|
),
|
|
axis=1
|
|
)
|
|
|
|
asset_list["Roof Insulation Thickness"] = asset_list.apply(
|
|
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
|
|
x["Roof Construction"]) else None,
|
|
axis=1
|
|
)
|
|
|
|
# Store as an excel
|
|
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/Settle EPC Data pull - 08 Nov 2024.xlsx"
|
|
asset_list.to_excel(filename, index=False)
|