mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
80 lines
2.9 KiB
Python
80 lines
2.9 KiB
Python
import os
|
|
from tqdm import tqdm
|
|
import pandas as pd
|
|
from dotenv import load_dotenv
|
|
from backend.SearchEpc import SearchEpc
|
|
from backend.app.utils import sap_to_epc
|
|
|
|
load_dotenv(dotenv_path="backend/.env")
|
|
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
|
|
|
|
|
def app():
|
|
"""
|
|
This script will retrieve EPC data, for postcodes and produce statistics on the SAP Score
|
|
:return:
|
|
"""
|
|
|
|
source_file = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Addresses - SFR rents.xlsx")
|
|
source_file["row_id"] = source_file.index
|
|
# Split out the town, which is the final portion of the string, separated by commas
|
|
source_file["Town"] = source_file["Address"].apply(lambda x: x.split(" ")[-1].strip() if not pd.isnull(x) else None)
|
|
source_file["Address"] = source_file["Address"].apply(
|
|
lambda x: " ".join(x.split(" ")[:-1]).strip() if not pd.isnull(x) else None
|
|
)
|
|
|
|
unique_postcodes = source_file[["Address", "Postcode"]].drop_duplicates()
|
|
|
|
# for each postcode, pull EPC data
|
|
collected_data = []
|
|
no_data_found = []
|
|
no_data_after_filters = []
|
|
for _, config in tqdm(unique_postcodes.iterrows(), total=len(unique_postcodes)):
|
|
address1 = config["Address"] if not pd.isnull(config["Address"]) else ""
|
|
searcher = SearchEpc(
|
|
postcode=config["Postcode"],
|
|
address1=address1,
|
|
auth_token=EPC_AUTH_TOKEN,
|
|
os_api_key=""
|
|
)
|
|
while True:
|
|
params = {
|
|
"postcode": config["Postcode"],
|
|
"address": address1,
|
|
}
|
|
results = searcher.client.domestic.search(params=params, size=10000)
|
|
if not results:
|
|
# We strip back address1
|
|
address1 = " ".join(address1.split(" ")[:-1])
|
|
if not address1:
|
|
break
|
|
else:
|
|
break
|
|
|
|
if not results:
|
|
no_data_found.append(config)
|
|
continue
|
|
|
|
data = pd.DataFrame(results["rows"])
|
|
|
|
data["current-energy-efficiency"] = data["current-energy-efficiency"].astype(int)
|
|
# Take EPCs post 2023
|
|
data["lodgement-date"] = pd.to_datetime(data["lodgement-date"], errors="coerce")
|
|
data = data[data["lodgement-date"] >= "2023-01-01"]
|
|
# Take private nrentals
|
|
data = data[data["tenure"].isin(["rental (private)", "Rented (private)"])]
|
|
|
|
if data.empty:
|
|
no_data_after_filters.append(config)
|
|
continue
|
|
|
|
agg = data.groupby(["property-type", "built-form"])["current-energy-efficiency"].mean().reset_index()
|
|
agg = agg.rename(columns={"current-energy-efficiency": "Average SAP"})
|
|
agg["Average EPC"] = agg["Average SAP"].apply(sap_to_epc)
|
|
agg.insert(0, "Postcode", config["Postcode"])
|
|
agg.insert(0, "Address", address1)
|
|
|
|
collected_data.append(agg)
|
|
|
|
collected_df = pd.concat(collected_data)
|
|
collected_df.to_csv("EPC Averages SFR.csv", index=False)
|