import os import pandas as pd from tqdm import tqdm from dotenv import load_dotenv from utils.s3 import read_excel_from_s3 from backend.SearchEpc import SearchEpc from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from recommendations.recommendation_utils import ( estimate_perimeter, estimate_external_wall_area, estimate_number_of_floors ) load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") def app(): """ This app is EPC pulling data for some properties owned by LHP :return: """ # asset_list = read_excel_from_s3( # bucket_name="retrofit-datalake-dev", # file_key="customers/guiness/TGP CW Properties PV.xlsx", # header_row=0 # ) asset_list = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Echo4 3.4.24.xlsx", header=0) epc_data = [] for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)): full_address = home["ADDRESS"] address_split = full_address.split(",") address1 = address_split[0].strip() postcode = address_split[-1].strip() searcher = SearchEpc( address1=address1, postcode=postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="", property_type=None, fast=True, full_address=full_address ) # Force the skipping of estimating the EPC searcher.ordnance_survey_client.property_type = None searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) if searcher.newest_epc is None: continue epc = { "asset_list_address": full_address, **searcher.newest_epc.copy() } epc_data.append(epc) epc_df = pd.DataFrame(epc_data) # Retrieve just the data we need epc_df = epc_df[ [ "asset_list_address", "uprn", "property-type", "built-form", "inspection-date", "current-energy-rating", "current-energy-efficiency", "roof-description", "walls-description", "transaction-type", # New fields needed "secondheat-description", "total-floor-area", "construction-age-band", "floor-height", "number-habitable-rooms", "mainheat-description" ] ] asset_list = asset_list.merge( epc_df, how="left", left_on=["ADDRESS"], right_on=["asset_list_address"] ) asset_list = asset_list.drop(columns=["asset_list_address"]) # Rename the columns asset_list = asset_list.rename(columns={ "inspection-date": "Date of last EPC", "current-energy-efficiency": "SAP score on register", "current-energy-rating": "EPC rating on register", "property-type": "Property Type", "built-form": "Archetype", "total-floor-area": "Property Floor Area", "construction-age-band": "Property Age Band", "floor-height": "Property Floor Height", "number-habitable-rooms": "Number of Habitable Rooms", "walls-description": "Wall Construction", "roof-description": "Roof Construction", "mainheat-description": "Heating Type", "secondheat-description": "Secondary Heating", "transaction-type": "Reason for last EPC" }) asset_list["Estimated Number of Floors"] = asset_list.apply( lambda x: estimate_number_of_floors(property_type=x["Property Type"]), axis=1 ) asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float) asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float) asset_list["Estimated Perimeter (m)"] = asset_list.apply( lambda x: estimate_perimeter( floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"], num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"], ), axis=1 ) asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply( lambda x: estimate_external_wall_area( num_floors=x["Estimated Number of Floors"], floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5, perimeter=x["Estimated Perimeter (m)"], built_form=x["Archetype"] ), axis=1 ) asset_list["Roof Insulation Thickness"] = asset_list.apply( lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"], axis=1 ) # Store as an excel filename = "LHP EPC Data pull.xlsx" asset_list.to_excel(filename, index=False)