Model/etl/customers/unitas/20_may_2024_data_pull.py
2024-06-01 14:32:20 +01:00

148 lines
4.9 KiB
Python

import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from recommendations.recommendation_utils import (
estimate_perimeter,
estimate_external_wall_area,
estimate_number_of_floors
)
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
"""
This app is EPC pulling data for some properties owned by Unitas
:return:
"""
# asset_list = read_excel_from_s3(
# bucket_name="retrofit-datalake-dev",
# file_key="customers/guiness/TGP CW Properties PV.xlsx",
# header_row=0
# )
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/UNITAS BUNGALOWS - EPC DATA PULL.xlsx", header=0
)
epc_data = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
searcher = SearchEpc(
address1=str(home["Address Line 1"]),
postcode=home["Post Code"],
uprn=home["Property Reference"],
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
epc = {
"asset_list_address": home["Address Line 1"],
"asset_list_postcode": home["Post Code"],
**searcher.newest_epc.copy()
}
epc_data.append(epc)
epc_df = pd.DataFrame(epc_data)
# Retrieve just the data we need
epc_df = epc_df[
[
"asset_list_address",
"uprn",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
# New fields needed
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description"
]
]
asset_list = asset_list.merge(
epc_df,
how="left",
left_on=["Address Line 1"],
right_on=["asset_list_address"]
)
asset_list = asset_list.drop(columns=["asset_list_address"])
# Rename the columns
asset_list = asset_list.rename(columns={
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "EPC Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC"
})
asset_list["Estimated Number of Floors"] = asset_list.apply(
lambda x: estimate_number_of_floors(property_type=x["EPC Property Type"]) if not pd.isnull(
x["EPC Property Type"]) else None,
axis=1
)
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
lambda x: estimate_perimeter(
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
) if not pd.isnull(x["uprn"]) else None, axis=1
)
asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["Estimated Number of Floors"],
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
perimeter=x["Estimated Perimeter (m)"],
built_form=x["Archetype"]
) if not pd.isnull(x["uprn"]) else None,
axis=1
)
asset_list["Roof Insulation Thickness"] = asset_list.apply(
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
x["uprn"]) else None,
axis=1
)
# Store as an excel
filename = "UNITAS BUNGALOWS - EPC DATA PULL - May 30tg 2024.xlsx"
asset_list.to_excel(filename, index=False)