import os import pandas as pd from tqdm import tqdm from dotenv import load_dotenv from backend.SearchEpc import SearchEpc from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from recommendations.recommendation_utils import ( estimate_perimeter, estimate_external_wall_area, estimate_number_of_floors ) load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") def app(): """ This app is EPC pulling data for some properties owned by Unitas :return: """ # asset_list = read_excel_from_s3( # bucket_name="retrofit-datalake-dev", # file_key="customers/guiness/TGP CW Properties PV.xlsx", # header_row=0 # ) asset_list = pd.read_excel( "/Users/khalimconn-kowlessar/Downloads/UNITAS BUNGALOWS - EPC DATA PULL.xlsx", header=0 ) epc_data = [] for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)): searcher = SearchEpc( address1=str(home["Address Line 1"]), postcode=home["Post Code"], uprn=home["Property Reference"], auth_token=EPC_AUTH_TOKEN, os_api_key="", property_type=None, fast=True ) # Force the skipping of estimating the EPC searcher.ordnance_survey_client.property_type = None searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) if searcher.newest_epc is None: continue epc = { "asset_list_address": home["Address Line 1"], "asset_list_postcode": home["Post Code"], **searcher.newest_epc.copy() } epc_data.append(epc) epc_df = pd.DataFrame(epc_data) # Retrieve just the data we need epc_df = epc_df[ [ "asset_list_address", "uprn", "property-type", "built-form", "inspection-date", "current-energy-rating", "current-energy-efficiency", "roof-description", "walls-description", "transaction-type", # New fields needed "secondheat-description", "total-floor-area", "construction-age-band", "floor-height", "number-habitable-rooms", "mainheat-description" ] ] asset_list = asset_list.merge( epc_df, how="left", left_on=["Address Line 1"], right_on=["asset_list_address"] ) asset_list = asset_list.drop(columns=["asset_list_address"]) # Rename the columns asset_list = asset_list.rename(columns={ "inspection-date": "Date of last EPC", "current-energy-efficiency": "SAP score on register", "current-energy-rating": "EPC rating on register", "property-type": "EPC Property Type", "built-form": "Archetype", "total-floor-area": "Property Floor Area", "construction-age-band": "Property Age Band", "floor-height": "Property Floor Height", "number-habitable-rooms": "Number of Habitable Rooms", "walls-description": "Wall Construction", "roof-description": "Roof Construction", "mainheat-description": "Heating Type", "secondheat-description": "Secondary Heating", "transaction-type": "Reason for last EPC" }) asset_list["Estimated Number of Floors"] = asset_list.apply( lambda x: estimate_number_of_floors(property_type=x["EPC Property Type"]) if not pd.isnull( x["EPC Property Type"]) else None, axis=1 ) asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float) asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float) asset_list["Estimated Perimeter (m)"] = asset_list.apply( lambda x: estimate_perimeter( floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"], num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"], ) if not pd.isnull(x["uprn"]) else None, axis=1 ) asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply( lambda x: estimate_external_wall_area( num_floors=x["Estimated Number of Floors"], floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5, perimeter=x["Estimated Perimeter (m)"], built_form=x["Archetype"] ) if not pd.isnull(x["uprn"]) else None, axis=1 ) asset_list["Roof Insulation Thickness"] = asset_list.apply( lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull( x["uprn"]) else None, axis=1 ) # Store as an excel filename = "UNITAS BUNGALOWS - EPC DATA PULL - May 30tg 2024.xlsx" asset_list.to_excel(filename, index=False)