import os from tqdm import tqdm import pandas as pd from dotenv import load_dotenv from backend.SearchEpc import SearchEpc from backend.app.utils import sap_to_epc load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") def app(): """ This script will retrieve EPC data, for postcodes and produce statistics on the SAP Score :return: """ source_file = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Addresses - SFR rents.xlsx") source_file["row_id"] = source_file.index # Split out the town, which is the final portion of the string, separated by commas source_file["Town"] = source_file["Address"].apply(lambda x: x.split(" ")[-1].strip() if not pd.isnull(x) else None) source_file["Address"] = source_file["Address"].apply( lambda x: " ".join(x.split(" ")[:-1]).strip() if not pd.isnull(x) else None ) unique_postcodes = source_file[["Address", "Postcode"]].drop_duplicates() # for each postcode, pull EPC data collected_data = [] no_data_found = [] no_data_after_filters = [] for _, config in tqdm(unique_postcodes.iterrows(), total=len(unique_postcodes)): address1 = config["Address"] if not pd.isnull(config["Address"]) else "" searcher = SearchEpc( postcode=config["Postcode"], address1=address1, auth_token=EPC_AUTH_TOKEN, os_api_key="" ) while True: params = { "postcode": config["Postcode"], "address": address1, } results = searcher.client.domestic.search(params=params, size=10000) if not results: # We strip back address1 address1 = " ".join(address1.split(" ")[:-1]) if not address1: break else: break if not results: no_data_found.append(config) continue data = pd.DataFrame(results["rows"]) data["current-energy-efficiency"] = data["current-energy-efficiency"].astype(int) # Take EPCs post 2023 data["lodgement-date"] = pd.to_datetime(data["lodgement-date"], errors="coerce") data = data[data["lodgement-date"] >= "2023-01-01"] # Take private nrentals data = data[data["tenure"].isin(["rental (private)", "Rented (private)"])] if data.empty: no_data_after_filters.append(config) continue agg = data.groupby(["property-type", "built-form"])["current-energy-efficiency"].mean().reset_index() agg = agg.rename(columns={"current-energy-efficiency": "Average SAP"}) agg["Average EPC"] = agg["Average SAP"].apply(sap_to_epc) agg.insert(0, "Postcode", config["Postcode"]) agg.insert(0, "Address", address1) collected_data.append(agg) collected_df = pd.concat(collected_data) collected_df.to_csv("EPC Averages SFR.csv", index=False)