mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
90 lines
3.3 KiB
Python
90 lines
3.3 KiB
Python
import inspect
|
|
import pandas as pd
|
|
from tqdm import tqdm
|
|
from pathlib import Path
|
|
|
|
src_file_path = inspect.getfile(lambda: None)
|
|
|
|
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
|
|
|
|
|
|
def app():
|
|
# For EPCs lodged from 2020 onwards, this collects data on the energy efficiency categories for wall insulation
|
|
# so that when we simulate, we know what the resulting energy efficiency category will be
|
|
|
|
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
|
|
|
|
date_cutoff = "2020-01-01"
|
|
walls_data = []
|
|
ashp_data = []
|
|
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
|
|
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
|
# Rename the columns to the same format as the api returns
|
|
data.columns = [c.replace("_", "-").lower() for c in data.columns]
|
|
|
|
insulated_walls = data[
|
|
data["walls-description"].isin(
|
|
[
|
|
"Cavity wall, filled cavity",
|
|
"Solid brick, with internal insulation",
|
|
"Solid brick, with external insulation",
|
|
]
|
|
)
|
|
]
|
|
insulated_walls = insulated_walls[~pd.isnull(insulated_walls["uprn"])]
|
|
insulated_walls = insulated_walls[
|
|
pd.to_datetime(insulated_walls["lodgement-date"]) >= date_cutoff
|
|
]
|
|
|
|
ashp = data[
|
|
data["mainheat-description"] == "Air source heat pump, radiators, electric"
|
|
]
|
|
ashp = ashp[~pd.isnull(ashp["uprn"])]
|
|
ashp = ashp[
|
|
pd.to_datetime(ashp["lodgement-date"]) >= date_cutoff
|
|
]
|
|
|
|
walls_data.append(insulated_walls)
|
|
ashp_data.append(ashp)
|
|
|
|
walls_df = pd.concat(walls_data)
|
|
ashp_df = pd.concat(ashp_data)
|
|
|
|
ashp_agg = (
|
|
ashp_df.
|
|
groupby(
|
|
["construction-age-band", "mainheat-description", "mainheatcont-description", "mainheat-energy-eff",
|
|
"mainheatc-energy-eff"]
|
|
)
|
|
.size()
|
|
.reset_index()
|
|
)
|
|
ashp_agg = ashp_agg[
|
|
ashp_agg["mainheatcont-description"].isin(
|
|
["Programmer, TRVs and bypass", "Time and temperature zone control"]
|
|
)
|
|
]
|
|
|
|
aggregations = {}
|
|
for description in [
|
|
"Cavity wall, filled cavity", "Solid brick, with internal insulation", "Solid brick, with external insulation"
|
|
]:
|
|
aggregation = walls_df[
|
|
walls_df["walls-description"] == description
|
|
].groupby(
|
|
["construction-age-band", "walls-energy-eff"]
|
|
).size().reset_index().rename(columns={0: "count"})
|
|
|
|
# For each grouping of age band, we use the most populus energy efficiency category
|
|
aggregation_deduped = aggregation.sort_values("count", ascending=False).drop_duplicates("construction-age-band")
|
|
aggregations[description] = aggregation_deduped
|
|
|
|
# Since these tables are small, we just convert them to python dictionaries
|
|
# This data is just held in the wall_energy_efficiency_values script, rather than s3
|
|
df1 = aggregations["Cavity wall, filled cavity"]
|
|
df2 = aggregations["Solid brick, with internal insulation"]
|
|
df3 = aggregations["Solid brick, with external insulation"]
|
|
|
|
df1.to_dict("records")
|
|
df2.to_dict("records")
|
|
df3.to_dict("records")
|