mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
124 lines
5 KiB
Python
124 lines
5 KiB
Python
import pandas as pd
|
|
from tqdm import tqdm
|
|
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
|
|
from utils.logger import setup_logger
|
|
from etl.epc.settings import EARLIEST_EPC_DATE
|
|
|
|
logger = setup_logger()
|
|
|
|
|
|
class AirSourceHeatPumpEfficiency:
|
|
|
|
def __init__(self, file_directories, cleaned_lookup):
|
|
"""
|
|
:param file_directories: A list of directories where files are stored.
|
|
:param cleaned_lookup: A dictionary containing cleaned lookup data.
|
|
"""
|
|
self.file_directories = file_directories
|
|
self.cleaned_lookup = cleaned_lookup
|
|
|
|
self.results = []
|
|
|
|
def create_dataset(self):
|
|
logger.info("Creating solar photo supply dataset")
|
|
|
|
heating_data = []
|
|
for dir in tqdm(self.file_directories):
|
|
filepath = dir / "certificates.csv"
|
|
df = pd.read_csv(filepath, low_memory=False)
|
|
# df = df[~pd.isnull(df["UPRN"])]
|
|
# df["UPRN"] = df["UPRN"].astype(int).astype(str)
|
|
# Take entries after SAP12
|
|
df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
|
|
df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]
|
|
|
|
# df = df[
|
|
# ~df["TENURE"].isin(
|
|
# [
|
|
# "unknown",
|
|
# "Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
|
|
# "It is not to be used for an existing dwelling"
|
|
# ]
|
|
# )
|
|
# ]
|
|
|
|
# Take entries that contain an air source heat pump
|
|
df = df[
|
|
(
|
|
# Air source heat pumps
|
|
(df["MAINHEAT_DESCRIPTION"] == "Air source heat pump, radiators, electric") &
|
|
(df["MAINHEATCONT_DESCRIPTION"] == "Time and temperature zone control")
|
|
) |
|
|
(
|
|
# High heat retention storage
|
|
df["MAINHEATCONT_DESCRIPTION"] == "Controls for high heat retention storage heaters"
|
|
)
|
|
]
|
|
|
|
# Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
|
|
for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
|
|
df = df[~pd.isnull(df[col])]
|
|
|
|
heating_data.append(df)
|
|
|
|
# temp
|
|
# import pickle
|
|
# with open("heating_data - delete me.pkl", "wb") as f:
|
|
# pickle.dump(heating_data, f)
|
|
|
|
heating_df = pd.concat(heating_data)
|
|
# Clean construction age band
|
|
from etl.epc.DataProcessor import EPCDataProcessor
|
|
heating_df["CONSTRUCTION_AGE_BAND_CLEAN"] = heating_df["CONSTRUCTION_AGE_BAND"].apply(
|
|
lambda x: EPCDataProcessor.clean_construction_age_band(x)
|
|
)
|
|
|
|
ashp_df = heating_df[
|
|
(heating_df["MAINHEAT_DESCRIPTION"] == "Air source heat pump, radiators, electric") &
|
|
# ~heating_df["CONSTRUCTION_AGE_BAND"].str.contains("England and Wales")
|
|
(~heating_df["CONSTRUCTION_AGE_BAND"].isin(["NO DATA!", "INVALID!"])) &
|
|
(heating_df["LODGEMENT_DATE"] >= pd.to_datetime("2019-01-01"))
|
|
]
|
|
ashp_efficiencies = (
|
|
ashp_df.groupby(
|
|
[
|
|
"CONSTRUCTION_AGE_BAND_CLEAN",
|
|
# "WALLS_DESCRIPTION",
|
|
# "ROOF_DESCRIPTION",
|
|
"MAINHEAT_ENERGY_EFF",
|
|
]
|
|
)["LMK_KEY"].count().reset_index()
|
|
)
|
|
|
|
ashp_df["MAINHEAT_ENERGY_EFF"].value_counts()
|
|
|
|
ashp_efficiencies["CONSTRUCTION_AGE_BAND_CLEAN"].value_counts()
|
|
ashp_efficiency_agg
|
|
|
|
all_counts_agg = all_counts.groupby(
|
|
[
|
|
"PROPERTY_TYPE",
|
|
"BUILT_FORM",
|
|
"MAINHEAT_DESCRIPTION",
|
|
"MAINHEAT_ENERGY_EFF",
|
|
"MAINHEATCONT_DESCRIPTION",
|
|
"MAINHEATC_ENERGY_EFF",
|
|
"MAIN_FUEL",
|
|
"HOTWATER_DESCRIPTION",
|
|
"HOT_WATER_ENERGY_EFF",
|
|
"MAINS_GAS_FLAG"
|
|
]
|
|
)["count"].sum().reset_index()
|
|
|
|
all_counts_agg.groupby("PROPERTY_TYPE")["count"].sum()
|
|
# In houses, 68% of the cases where we see air source heat pumps are in detached and semi-detached houses
|
|
all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "House"]["BUILT_FORM"].value_counts(normalize=True)
|
|
|
|
all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Flat"]["BUILT_FORM"].value_counts()
|
|
|
|
# In Bungalows, 74% of cases where we see air source heat pumps are in detached and semi-detached houses
|
|
all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Bungalow"]["BUILT_FORM"].value_counts(normalize=True)
|
|
|
|
# TODO: Research options for mid and end-terrace houses
|
|
# TODO: Research the options for flats - we see them appear in flats, but practically speaking, how does the
|
|
# install process work?
|