mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
77 lines
2.9 KiB
Python
77 lines
2.9 KiB
Python
import inspect
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from tqdm import tqdm
|
|
from etl.epc.settings import EARLIEST_EPC_DATE
|
|
from etl.spatial.OpenUprnClient import OpenUprnClient
|
|
|
|
src_file_path = inspect.getfile(lambda: None)
|
|
|
|
EPC_DIRECTORY = Path("/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates")
|
|
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
|
|
|
|
aggregation = []
|
|
for directory in tqdm(epc_directories):
|
|
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
|
# Rename the columns to the same format as the api returns
|
|
data.columns = [c.replace("_", "-").lower() for c in data.columns]
|
|
|
|
data = data[data["posttown"].str.contains("London", case=False, na=False)]
|
|
if data.empty:
|
|
continue
|
|
# Take just date before the date threshold
|
|
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
|
|
|
|
data = data[~pd.isnull(data["uprn"])]
|
|
data["uprn"] = data["uprn"].astype(int)
|
|
# Take just the newest EPC per uprn, based on lodgement-date
|
|
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
|
|
# Take EPC D and below
|
|
data = data[data["current-energy-rating"].isin(["D", "E", "F", "G"])]
|
|
data["postal_region"] = data["postcode"].str.split(" ").str[0]
|
|
|
|
# Take homes that don't have a gas boiler
|
|
off_gas = data[~data["main-fuel"].str.contains("mains gas", case=False, na=False)]
|
|
|
|
if off_gas.empty:
|
|
continue
|
|
|
|
# Remote properties with conservation area issues
|
|
uprns = off_gas["uprn"].unique()
|
|
# Get data
|
|
ca_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
|
|
off_gas = off_gas.merge(
|
|
ca_data[["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]].rename(
|
|
columns={"UPRN": "uprn"}
|
|
),
|
|
how="left",
|
|
on="uprn",
|
|
)
|
|
# Remove any restricted units
|
|
off_gas = off_gas[
|
|
(off_gas["conservation_status"] != True)
|
|
& (off_gas["is_listed_building"] != True)
|
|
& (off_gas["is_heritage_building"] != True)
|
|
]
|
|
|
|
off_gas = off_gas[
|
|
off_gas["tenure"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
|
|
]
|
|
|
|
region_summary = off_gas.groupby("postal_region").size().reset_index(name="count")
|
|
|
|
aggregation.append(region_summary)
|
|
|
|
postal_region_aggregation = pd.concat(aggregation)
|
|
# Re-aggregate
|
|
postal_region_aggregation = postal_region_aggregation.groupby("postal_region")["count"].sum().reset_index()
|
|
|
|
postal_region_aggregation = postal_region_aggregation.sort_values("count", ascending=False)
|
|
postal_region_aggregation = postal_region_aggregation.rename(
|
|
columns={"postal_region": "Postcode Region", "count": "Number of Homes"}
|
|
)
|
|
postal_region_aggregation.to_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/GLA/Off Gas EPC D-G Postal Regions - without conservation "
|
|
"area.xlsx",
|
|
index=False
|
|
)
|