Added starting sap and starting heat demand

This commit is contained in:
Khalim Conn-Kowlessar 2023-09-06 17:17:29 +01:00
parent e516a6ac41
commit 235d85d5bd

View file

@ -1,4 +1,3 @@
import numpy as np
import pandas as pd
from tqdm import tqdm
@ -31,6 +30,7 @@ def app():
# TODO [x] : Does energy tariff make a difference
# - leave for now but it may not
# TODO: [x] : Add starting SAP and head demand as a feature
# TODO [x] : If SAP hasn't changed, we don't include the record
# TODO [x]: If SAP gets worse, it genuinely looks like in the vast majority of cases that the building looks
# worse in the newer epc, so we can switch the orders
@ -53,6 +53,7 @@ def app():
df = data_processor.pre_process()
cleaning_averages = data_processor.make_cleaning_averages()
data_by_urpn = []
for uprn, property_data in df.groupby("UPRN", observed=True):
# Fixed features - these are property attributes that shouldn't change over time
@ -85,8 +86,7 @@ def app():
# We include the lodgement date here as we probably need to factor time into the
# model, since EPC standards and rigour have changed over time
variable_data = modified_property_data[
COMPONENT_FEATURES
+ ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE]
COMPONENT_FEATURES + ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE]
]
# Note: we look at changes between subsequent EPCS, however we could look at other permutations
@ -104,11 +104,15 @@ def app():
gets_better = earliest_record[RDSAP_RESPONSE] <= latest_record[RDSAP_RESPONSE]
if gets_better:
rdsap_change = latest_record[RDSAP_RESPONSE] - earliest_record[RDSAP_RESPONSE]
heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - earliest_record[HEAT_DEMAND_RESPONSE]
starting_sap = earliest_record[RDSAP_RESPONSE]
starting_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
rdsap_change = latest_record[RDSAP_RESPONSE] - starting_sap
heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
else:
rdsap_change = earliest_record[RDSAP_RESPONSE] - latest_record[RDSAP_RESPONSE]
heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - latest_record[HEAT_DEMAND_RESPONSE]
starting_sap = latest_record[RDSAP_RESPONSE]
starting_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
rdsap_change = earliest_record[RDSAP_RESPONSE] - starting_sap
heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
if rdsap_change == 0:
continue
@ -127,24 +131,28 @@ def app():
"UPRN": uprn,
"RDSAP_CHANGE": rdsap_change,
"HEAT_DEMAND_CHANGE": heat_demand_change,
"STARTING_SAP": starting_sap,
"STARTING_HEAT_DEMAND": starting_heat_demand,
**fixed_data,
**features.to_dict(),
}
)
property_model_df = pd.DataFrame(property_model_data)
# Add some temporal features - we look at the days from the standard starting point in time
# for the starting and ending date so all records are from a fixed point
# TODO: implement me
property_model_df["DAYS_TO_STARTING"] = None
property_model_df["DAYS_TO_ENDING"] = None
data_by_urpn.extend(property_model_data)
dataset.append(property_model_df)
data_by_urpn_df = pd.DataFrame(data_by_urpn)
# Add some temporal features - we look at the days from the standard starting point in time
# for the starting and ending date so all records are from a fixed point
# TODO: implement me
data_by_urpn_df["DAYS_TO_STARTING"] = None
data_by_urpn_df["DAYS_TO_ENDING"] = None
# TODO: We need to pre-process the data. For instance, rather than using static for roofs, walls and
# floors, we may want to use the U-value. We may also want to handle the (assumed) tags
# within descriptions
dataset.append(data_by_urpn_df)
cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0]
cleaning_dataset.append(cleaning_averages)