mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
335 lines
12 KiB
Python
335 lines
12 KiB
Python
import os
|
||
import time
|
||
|
||
import pandas as pd
|
||
import numpy as np
|
||
from tqdm import tqdm
|
||
|
||
from dotenv import load_dotenv
|
||
from backend.SearchEpc import SearchEpc
|
||
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
|
||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||
|
||
from recommendations.recommendation_utils import (
|
||
estimate_perimeter,
|
||
estimate_external_wall_area,
|
||
estimate_number_of_floors
|
||
)
|
||
|
||
load_dotenv(dotenv_path="backend/.env")
|
||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||
|
||
|
||
def get_data(asset_list, fulladdress_column, address1_column, postcode_column):
|
||
epc_data = []
|
||
errors = []
|
||
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
|
||
postcode = home[postcode_column]
|
||
house_number = home[address1_column]
|
||
full_address = home[fulladdress_column]
|
||
|
||
searcher = SearchEpc(
|
||
address1=str(house_number),
|
||
postcode=postcode,
|
||
auth_token=EPC_AUTH_TOKEN,
|
||
os_api_key="",
|
||
property_type=None,
|
||
fast=True,
|
||
full_address=full_address,
|
||
max_retries=5
|
||
)
|
||
# Force the skipping of estimating the EPC
|
||
searcher.ordnance_survey_client.property_type = None
|
||
searcher.ordnance_survey_client.built_form = None
|
||
|
||
searcher.find_property(skip_os=True)
|
||
if searcher.newest_epc is None:
|
||
continue
|
||
|
||
# Look for EPC recommendatons
|
||
try:
|
||
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
|
||
except:
|
||
property_recommendations = {"rows": []}
|
||
|
||
# Retrieve data from FindMyEPC
|
||
find_epc_searcher = RetrieveFindMyEpc(
|
||
address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"]
|
||
)
|
||
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
|
||
time.sleep(np.random.uniform(0.1, 1))
|
||
try:
|
||
postcode = home[postcode_column]
|
||
house_number = home[address1_column]
|
||
full_address = home[fulladdress_column]
|
||
|
||
searcher = SearchEpc(
|
||
address1=str(house_number),
|
||
postcode=postcode,
|
||
auth_token=EPC_AUTH_TOKEN,
|
||
os_api_key="",
|
||
property_type=None,
|
||
fast=True,
|
||
full_address=full_address,
|
||
max_retries=5
|
||
)
|
||
# Force the skipping of estimating the EPC
|
||
searcher.ordnance_survey_client.property_type = None
|
||
searcher.ordnance_survey_client.built_form = None
|
||
|
||
searcher.find_property(skip_os=True)
|
||
if searcher.newest_epc is None:
|
||
continue
|
||
|
||
# Look for EPC recommendatons
|
||
try:
|
||
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
|
||
except:
|
||
property_recommendations = {"rows": []}
|
||
|
||
# Retrieve data from FindMyEPC
|
||
find_epc_searcher = RetrieveFindMyEpc(
|
||
address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"]
|
||
)
|
||
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
|
||
time.sleep(np.random.uniform(0.1, 1))
|
||
|
||
epc = {
|
||
"row_id": home["row_id"],
|
||
**searcher.newest_epc.copy(),
|
||
"recommendations": property_recommendations["rows"],
|
||
"find_my_epc_data": find_epc_data,
|
||
}
|
||
|
||
epc_data.append(epc)
|
||
except Exception as e:
|
||
errors.append(home["row_id"])
|
||
time.sleep(5)
|
||
|
||
return epc_data, errors
|
||
|
||
|
||
def extract_address1(asset_list, full_address_col, method="first_two_words"):
|
||
if method == "first_two_words":
|
||
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
|
||
return asset_list
|
||
|
||
raise ValueError(f"Method {method} not recognized")
|
||
|
||
|
||
def app():
|
||
"""
|
||
This app is EPC pulling data for some properties owned by Livewest
|
||
|
||
Data request contents:
|
||
Date of last EPC
|
||
Reason for EPC
|
||
SAP score on register
|
||
Property Type
|
||
Property Area
|
||
Property Age
|
||
Any Dimensions (HLP,PW,RH)
|
||
Property Wall Construction
|
||
Heating Type
|
||
Secondary Heating
|
||
Loft Insulation Depth
|
||
|
||
Additional if possible:
|
||
Heat loss calculations
|
||
EPC recommendations
|
||
Property UPRN
|
||
|
||
"""
|
||
DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/P&F/"
|
||
DATA_FILENAME = "BELOW C - WFT FINDINGS ON INSPECTION PLUS SUGGESTED ACTION.xlsx"
|
||
POSTCODE_COLUMN = "Postcode"
|
||
FULLADDRESS_COLUMN = "Address"
|
||
ADDRESS1_COLUMN = None
|
||
ADDRESS1_METHOD = "first_two_words"
|
||
|
||
asset_list = pd.read_excel(os.path.join(DATA_FOLDER, DATA_FILENAME), header=0)
|
||
asset_list["row_id"] = asset_list.index
|
||
|
||
# We clean up portential non-breaking spaces, and double spaces
|
||
for col in [c for c in [POSTCODE_COLUMN, FULLADDRESS_COLUMN, ADDRESS1_COLUMN] if c is not None]:
|
||
asset_list[col] = asset_list[col].str.replace('\xa0', ' ', regex=False)
|
||
asset_list[col] = asset_list[col].str.replace(' ', ' ', regex=False)
|
||
|
||
if ADDRESS1_COLUMN is None:
|
||
ADDRESS1_COLUMN = "address1_extracted"
|
||
asset_list = extract_address1(asset_list, FULLADDRESS_COLUMN, ADDRESS1_METHOD)
|
||
|
||
epc_data, errors = get_data(
|
||
asset_list=asset_list,
|
||
fulladdress_column=FULLADDRESS_COLUMN,
|
||
address1_column=ADDRESS1_COLUMN,
|
||
postcode_column=POSTCODE_COLUMN
|
||
)
|
||
|
||
# We now retrieve any failed properties
|
||
asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
|
||
epc_data_failed, _ = get_data(
|
||
asset_list=asset_list_failed,
|
||
fulladdress_column=FULLADDRESS_COLUMN,
|
||
address1_column=ADDRESS1_COLUMN,
|
||
postcode_column=POSTCODE_COLUMN
|
||
)
|
||
|
||
# Append the failed data to the main data
|
||
epc_data.extend(epc_data_failed)
|
||
|
||
epc_df = pd.DataFrame(epc_data)
|
||
|
||
# We expand out the recommendations
|
||
recommendations_df = epc_df[["row_id", "recommendations"]]
|
||
|
||
unique_recommendations = set()
|
||
for _, row in recommendations_df.iterrows():
|
||
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
||
|
||
columns = ["row_id"] + list(unique_recommendations)
|
||
transformed_data = []
|
||
for _, row in recommendations_df.iterrows():
|
||
# Initialize a dictionary for this row with False for all recommendations
|
||
row_data = {col: False for col in columns}
|
||
row_data["row_id"] = row["row_id"]
|
||
|
||
# Set True for each recommendation present in this row
|
||
for rec in row["recommendations"]:
|
||
recommendation_text = rec["improvement-summary-text"]
|
||
row_data[recommendation_text] = True
|
||
|
||
# Append the row data to transformed_data
|
||
transformed_data.append(row_data)
|
||
|
||
transformed_df = pd.DataFrame(transformed_data)
|
||
# Drop the column that is ""
|
||
transformed_df = transformed_df.drop(columns=[""])
|
||
|
||
# Get the find my epc data
|
||
find_my_epc_data = epc_df[["row_id", "find_my_epc_data"]].drop(columns=["find_my_epc_data"]).join(
|
||
pd.json_normalize(epc_df["find_my_epc_data"])
|
||
)
|
||
# We check if we get the solar pv column:
|
||
if "Solar photovoltaics" not in find_my_epc_data.columns:
|
||
find_my_epc_data["Solar photovoltaics"] = False
|
||
|
||
# Retrieve just the data we need
|
||
epc_df = epc_df[
|
||
[
|
||
"row_id",
|
||
"uprn",
|
||
"property-type",
|
||
"built-form",
|
||
"inspection-date",
|
||
"current-energy-rating",
|
||
"current-energy-efficiency",
|
||
"roof-description",
|
||
"walls-description",
|
||
"transaction-type",
|
||
# New fields needed
|
||
"secondheat-description",
|
||
"total-floor-area",
|
||
"construction-age-band",
|
||
"floor-height",
|
||
"number-habitable-rooms",
|
||
"mainheat-description",
|
||
#
|
||
"energy-consumption-current", # kwh/m2
|
||
"photo-supply",
|
||
]
|
||
]
|
||
|
||
asset_list = asset_list.merge(
|
||
epc_df,
|
||
how="left",
|
||
on="row_id"
|
||
).merge(
|
||
find_my_epc_data[
|
||
[
|
||
"row_id", "heating_text", "hot_water_text", 'Assessor’s name',
|
||
"Assessor's Telephone", "Assessor's Email", "Accreditation scheme",
|
||
"Assessor’s ID", "Solar photovoltaics"
|
||
]
|
||
].rename(
|
||
columns={
|
||
"Solar photovoltaics": "Has Solar PV",
|
||
"heating_text": "Heating Estimated kWh",
|
||
"hot_water_text": "Hot Water Estimated kWh",
|
||
}
|
||
),
|
||
how="left",
|
||
on="row_id"
|
||
)
|
||
|
||
asset_list["Has Solar PV"] = asset_list["Has Solar PV"] | ~asset_list["photo-supply"].isin(["0.0", 0, None, ""])
|
||
asset_list = asset_list.drop(columns=["photo-supply"])
|
||
|
||
# Rename the columns
|
||
asset_list = asset_list.rename(columns={
|
||
"inspection-date": "Date of last EPC",
|
||
"current-energy-efficiency": "SAP score on register",
|
||
"current-energy-rating": "EPC rating on register",
|
||
"property-type": "Property Type",
|
||
"built-form": "Archetype",
|
||
"total-floor-area": "Property Floor Area",
|
||
"construction-age-band": "Property Age Band",
|
||
"floor-height": "Property Floor Height",
|
||
"number-habitable-rooms": "Number of Habitable Rooms",
|
||
"walls-description": "Wall Construction",
|
||
"roof-description": "Roof Construction",
|
||
"mainheat-description": "Heating Type",
|
||
"secondheat-description": "Secondary Heating",
|
||
"transaction-type": "Reason for last EPC",
|
||
"energy-consumption-current": "Heat Demand (kWh/m2)",
|
||
})
|
||
|
||
asset_list["Estimated Number of Floors"] = asset_list.apply(
|
||
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
|
||
x["Property Type"]) else None, axis=1
|
||
)
|
||
|
||
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
|
||
# Replace "" value with None
|
||
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
|
||
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
|
||
|
||
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
|
||
lambda x: estimate_perimeter(
|
||
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
|
||
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
|
||
), axis=1
|
||
)
|
||
|
||
asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
|
||
lambda x: estimate_external_wall_area(
|
||
num_floors=x["Estimated Number of Floors"],
|
||
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
|
||
perimeter=x["Estimated Perimeter (m)"],
|
||
built_form=x["Archetype"]
|
||
),
|
||
axis=1
|
||
)
|
||
|
||
asset_list["Roof Insulation Thickness"] = asset_list.apply(
|
||
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
|
||
x["Roof Construction"]) else None,
|
||
axis=1
|
||
)
|
||
|
||
# For all of the columns in transformed_df, prefix with "Recommendation: "
|
||
for col in transformed_df.columns:
|
||
if col == "row_id":
|
||
continue
|
||
transformed_df = transformed_df.rename(columns={col: f"Recommendation: {col}"})
|
||
|
||
asset_list = asset_list.merge(
|
||
transformed_df,
|
||
how="left",
|
||
on="row_id"
|
||
)
|
||
asset_list = asset_list.drop(columns=["row_id"])
|
||
|
||
# Store as an excel
|
||
filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " EPC Data Pull.xlsx"
|
||
asset_list.to_excel(filename, index=False)
|