From 7a275deb6df6a231bde60d64d78ba3b04ab32f38 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 23 Apr 2024 17:12:39 +0100 Subject: [PATCH] route march code --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/customers/guiness/route_march.py | 98 ++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 etl/customers/guiness/route_march.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/guiness/route_march.py b/etl/customers/guiness/route_march.py new file mode 100644 index 00000000..28f350d3 --- /dev/null +++ b/etl/customers/guiness/route_march.py @@ -0,0 +1,98 @@ +import os + +import pandas as pd +from tqdm import tqdm + +from dotenv import load_dotenv +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from epc_api.client import EpcClient +from utils.s3 import save_csv_to_s3 + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def app(): + """ + This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the + route march + + These properties were provided to us by Ecosurv + :return: + """ + asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/guiness/TGP CW Properties PV.xlsx", + header_row=0 + ) + + epc_data = [] + for _, guiness_property in tqdm(asset_list.iterrows(), total=len(asset_list)): + + searcher = SearchEpc( + address1=str(guiness_property["Address"]), + postcode=guiness_property["POSTCODES"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_address": guiness_property["Address"], + "asset_list_postcode": guiness_property["POSTCODES"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + epc_df = pd.DataFrame(epc_data) + + # Retrieve just the data we need + epc_df = epc_df[ + [ + "asset_list_address", + "asset_list_postcode", + "uprn", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type" + ] + ] + + asset_list = asset_list.merge( + epc_df, how="left", left_on=["Address", "POSTCODES"], right_on=["asset_list_address", "asset_list_postcode"] + ) + + # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated + asset_list = asset_list.drop_duplicates(subset=["Address", "POSTCODES"]) + asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_postcode"]) + + # Rename the columns + asset_list = asset_list.rename(columns={ + "property-type": "Property Type", + "built-form": "Archetype", + "inspection-date": "Last EPC Inspection Date", + "current-energy-rating": "Last survey EPC Rating", + "current-energy-efficiency": "Last survey SAP Score", + "roof-description": "Roof Construction", + "walls-description": "Wall Construction", + "transaction-type": "Last EPC Reason" + }) + + # Store as an excel + filename = "Guiness EPC data.xlsx" + asset_list.to_excel(filename, index=False)