Model/etl/customers/vander_elliot/initial_scoping.py
Khalim Conn-Kowlessar fb6ab43b76 minor initial scoping
2024-04-18 11:07:15 +01:00

23 lines
866 B
Python

import pandas as pd
from utils.s3 import save_csv_to_s3
def app():
# Check how many properties there are at EPC F/G in Birmingham
epc_data = pd.read_csv(
"local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
low_memory=False
)
# Filter on entries where we have a UPRN
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["F", "G"])]
one_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(1 * 365))
epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= one_years_ago]