Model/etl/customers/guiness/route_march.py
Khalim Conn-Kowlessar 7a275deb6d route march code
2024-04-23 17:12:39 +01:00

98 lines
3 KiB
Python

import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
"""
This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
route march
These properties were provided to us by Ecosurv
:return:
"""
asset_list = read_excel_from_s3(
bucket_name="retrofit-datalake-dev",
file_key="customers/guiness/TGP CW Properties PV.xlsx",
header_row=0
)
epc_data = []
for _, guiness_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
searcher = SearchEpc(
address1=str(guiness_property["Address"]),
postcode=guiness_property["POSTCODES"],
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
epc = {
"asset_list_address": guiness_property["Address"],
"asset_list_postcode": guiness_property["POSTCODES"],
**searcher.newest_epc.copy()
}
epc_data.append(epc)
epc_df = pd.DataFrame(epc_data)
# Retrieve just the data we need
epc_df = epc_df[
[
"asset_list_address",
"asset_list_postcode",
"uprn",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type"
]
]
asset_list = asset_list.merge(
epc_df, how="left", left_on=["Address", "POSTCODES"], right_on=["asset_list_address", "asset_list_postcode"]
)
# De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
asset_list = asset_list.drop_duplicates(subset=["Address", "POSTCODES"])
asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_postcode"])
# Rename the columns
asset_list = asset_list.rename(columns={
"property-type": "Property Type",
"built-form": "Archetype",
"inspection-date": "Last EPC Inspection Date",
"current-energy-rating": "Last survey EPC Rating",
"current-energy-efficiency": "Last survey SAP Score",
"roof-description": "Roof Construction",
"walls-description": "Wall Construction",
"transaction-type": "Last EPC Reason"
})
# Store as an excel
filename = "Guiness EPC data.xlsx"
asset_list.to_excel(filename, index=False)