import os import time import pandas as pd from tqdm import tqdm from dotenv import load_dotenv from utils.s3 import read_excel_from_s3 from backend.SearchEpc import SearchEpc from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from recommendations.recommendation_utils import ( estimate_perimeter, estimate_external_wall_area, estimate_number_of_floors ) load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") def get_data(asset_list): epc_data = [] errors = [] for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)): try: postcode = home["Postcode"] address1 = home["address1"].split(",")[0] full_address = home["Address"] searcher = SearchEpc( address1=str(address1), postcode=postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="", property_type=None, fast=True, full_address=full_address, max_retries=5 ) # Force the skipping of estimating the EPC searcher.ordnance_survey_client.property_type = None searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) if searcher.newest_epc is None: continue # Look for EPC recommendatons try: property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"]) except: property_recommendations = {"rows": []} epc = { "row_id": home["row_id"], **searcher.newest_epc.copy(), "recommendations": property_recommendations["rows"] } epc_data.append(epc) except Exception as e: errors.append(home["row_id"]) time.sleep(5) return epc_data, errors def app(): """ This app is EPC pulling data for some properties owned by Livewest Data request contents: Date of last EPC Reason for EPC SAP score on register Property Type Property Area Property Age Any Dimensions (HLP,PW,RH) Property Wall Construction Heating Type Secondary Heating Loft Insulation Depth Additional if possible: Heat loss calculations EPC recommendations Property UPRN """ asset_list = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/Southend Planned programme.xlsx", header=0, sheet_name="Planned RM" ) asset_list["row_id"] = asset_list.index asset_list["address1"] = asset_list["Address"].str.split(",").str[0] epc_data, errors = get_data(asset_list) # We now retrieve any failed properties asset_list_failed = asset_list[asset_list["row_id"].isin(errors)] epc_data_failed, _ = get_data(asset_list_failed) # Append the failed data to the main data epc_data.extend(epc_data_failed) epc_df = pd.DataFrame(epc_data) # We expand out the recommendations recommendations_df = epc_df[["row_id", "recommendations"]] unique_recommendations = set() for _, row in recommendations_df.iterrows(): unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]]) columns = ["row_id"] + list(unique_recommendations) transformed_data = [] for _, row in recommendations_df.iterrows(): # Initialize a dictionary for this row with False for all recommendations row_data = {col: False for col in columns} row_data["row_id"] = row["row_id"] # Set True for each recommendation present in this row for rec in row["recommendations"]: recommendation_text = rec["improvement-summary-text"] row_data[recommendation_text] = True # Append the row data to transformed_data transformed_data.append(row_data) transformed_df = pd.DataFrame(transformed_data) # Drop the column that is "" transformed_df = transformed_df.drop(columns=[""]) # Retrieve just the data we need epc_df = epc_df[ [ "row_id", "uprn", "property-type", "built-form", "inspection-date", "current-energy-rating", "current-energy-efficiency", "roof-description", "walls-description", "transaction-type", # New fields needed "secondheat-description", "total-floor-area", "construction-age-band", "floor-height", "number-habitable-rooms", "mainheat-description", # "energy-consumption-current", # kwh/m2 "photo-supply", ] ] asset_list = asset_list.merge( epc_df, how="left", on="row_id" ).merge( transformed_df, how="left", on="row_id" ) asset_list = asset_list.drop(columns=["row_id"]) # Rename the columns asset_list = asset_list.rename(columns={ "inspection-date": "Date of last EPC", "current-energy-efficiency": "SAP score on register", "current-energy-rating": "EPC rating on register", "property-type": "Property Type", "built-form": "Archetype", "total-floor-area": "Property Floor Area", "construction-age-band": "Property Age Band", "floor-height": "Property Floor Height", "number-habitable-rooms": "Number of Habitable Rooms", "walls-description": "Wall Construction", "roof-description": "Roof Construction", "mainheat-description": "Heating Type", "secondheat-description": "Secondary Heating", "transaction-type": "Reason for last EPC", "energy-consumption-current": "Heat Demand (kWh/m2)", "photo-supply": "% of the Roof with PV" }) asset_list["Estimated Number of Floors"] = asset_list.apply( lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull( x["Property Type"]) else None, axis=1 ) asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float) # Replace "" value with None asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None) asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float) asset_list["Estimated Perimeter (m)"] = asset_list.apply( lambda x: estimate_perimeter( floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"], num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"], ), axis=1 ) asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply( lambda x: estimate_external_wall_area( num_floors=x["Estimated Number of Floors"], floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5, perimeter=x["Estimated Perimeter (m)"], built_form=x["Archetype"] ), axis=1 ) asset_list["Roof Insulation Thickness"] = asset_list.apply( lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull( x["Roof Construction"]) else None, axis=1 ) # Store as an excel filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/southend EPC Data pull - 14 Nov " "2024.xlsx") asset_list.to_excel(filename, index=False)