mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
134 lines
4.1 KiB
Python
134 lines
4.1 KiB
Python
import os
|
|
|
|
import pandas as pd
|
|
from tqdm import tqdm
|
|
|
|
from dotenv import load_dotenv
|
|
from utils.s3 import read_excel_from_s3
|
|
from backend.SearchEpc import SearchEpc
|
|
from epc_api.client import EpcClient
|
|
from utils.s3 import save_csv_to_s3
|
|
|
|
load_dotenv(dotenv_path="backend/.env")
|
|
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
|
|
|
|
|
def route_march_may_2024():
|
|
"""
|
|
This code pulls supplementary data for a route march that is expected to happen in May 2024. This code
|
|
was authored on the 30th April 2024.
|
|
"""
|
|
|
|
asset_list = read_excel_from_s3(
|
|
bucket_name="retrofit-datalake-dev",
|
|
file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx",
|
|
header_row=0
|
|
)
|
|
|
|
epc_data = []
|
|
for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)):
|
|
|
|
lst = [unit["NO"], unit["ADDRESS 1"], unit["ADDRESS 2"], unit["ADDRESS 3"], unit["POSTCODE"]]
|
|
lst = [str(x).strip() for x in lst if not pd.isnull(x)]
|
|
|
|
full_address = ", ".join(lst)
|
|
|
|
searcher = SearchEpc(
|
|
address1=str(unit["NO"]),
|
|
postcode=unit["POSTCODE"],
|
|
auth_token=EPC_AUTH_TOKEN,
|
|
os_api_key="",
|
|
property_type=None,
|
|
fast=True,
|
|
full_address=full_address
|
|
)
|
|
# Force the skipping of estimating the EPC
|
|
searcher.ordnance_survey_client.property_type = None
|
|
searcher.ordnance_survey_client.built_form = None
|
|
|
|
searcher.find_property(skip_os=True)
|
|
if searcher.newest_epc is None:
|
|
# We try with a different address 1
|
|
add1 = str(unit["NO"]).lower()
|
|
add1 = (
|
|
add1
|
|
.replace("flat", "")
|
|
.replace("ft", "")
|
|
.replace("t", "").strip()
|
|
)
|
|
|
|
searcher = SearchEpc(
|
|
address1=add1,
|
|
postcode=unit["POSTCODE"],
|
|
auth_token=EPC_AUTH_TOKEN,
|
|
os_api_key="",
|
|
property_type=None,
|
|
fast=True,
|
|
full_address=full_address
|
|
)
|
|
# Force the skipping of estimating the EPC
|
|
searcher.ordnance_survey_client.property_type = None
|
|
searcher.ordnance_survey_client.built_form = None
|
|
|
|
searcher.find_property(skip_os=True)
|
|
|
|
if searcher.newest_epc is None:
|
|
continue
|
|
|
|
epc = {
|
|
"asset_list_house_no": unit["NO"],
|
|
"asset_list_address1": unit["ADDRESS 1"],
|
|
"asset_list_postcode": unit["POSTCODE"],
|
|
**searcher.newest_epc.copy()
|
|
}
|
|
|
|
epc_data.append(epc)
|
|
|
|
epc_df = pd.DataFrame(epc_data)
|
|
|
|
#
|
|
|
|
# Retrieve just the data we need
|
|
epc_df = epc_df[
|
|
[
|
|
"asset_list_house_no",
|
|
"asset_list_address1",
|
|
"asset_list_postcode",
|
|
"uprn",
|
|
"address",
|
|
"property-type",
|
|
"built-form",
|
|
"inspection-date",
|
|
"current-energy-rating",
|
|
"current-energy-efficiency",
|
|
"roof-description",
|
|
"walls-description",
|
|
"transaction-type"
|
|
]
|
|
].rename(columns={"address": "Matched EPC Address"})
|
|
|
|
asset_list = asset_list.merge(
|
|
epc_df,
|
|
how="left",
|
|
left_on=["NO", "ADDRESS 1", "POSTCODE"],
|
|
right_on=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"]
|
|
)
|
|
|
|
asset_list = asset_list.drop_duplicates(subset=["NO", "ADDRESS 1", "POSTCODE"])
|
|
asset_list = asset_list.drop(columns=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"])
|
|
|
|
# Rename the columns
|
|
asset_list = asset_list.rename(columns={
|
|
"property-type": "Property Type",
|
|
"built-form": "Archetype",
|
|
"inspection-date": "Last EPC Inspection Date",
|
|
"current-energy-rating": "Last survey EPC Rating",
|
|
"current-energy-efficiency": "Last survey SAP Score",
|
|
"roof-description": "Roof Construction",
|
|
"walls-description": "Wall Construction",
|
|
"transaction-type": "Last EPC Reason"
|
|
})
|
|
|
|
# Store as an excel
|
|
filename = "Livewest EPC data.xlsx"
|
|
asset_list.to_excel(filename, index=False)
|