Model/etl/customers/goldman/epc_f_g_properties.py
2024-04-26 14:06:48 +01:00

25 lines
885 B
Python

import pandas as pd
def app():
"""
Pulling the list of EPC G & F properties in Birmingham for Goldman Sachs
"""
epc_data = pd.read_csv(
"local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
low_memory=False
)
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
# Get G & F properties
epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
# Save as an excel
epc_data.to_excel("Birmingham EPC F & G Properties.xlsx", index=False)