mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Added starting sap and starting heat demand
This commit is contained in:
parent
e516a6ac41
commit
235d85d5bd
1 changed files with 22 additions and 14 deletions
|
|
@ -1,4 +1,3 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
|
|
@ -31,6 +30,7 @@ def app():
|
|||
|
||||
# TODO [x] : Does energy tariff make a difference
|
||||
# - leave for now but it may not
|
||||
# TODO: [x] : Add starting SAP and head demand as a feature
|
||||
# TODO [x] : If SAP hasn't changed, we don't include the record
|
||||
# TODO [x]: If SAP gets worse, it genuinely looks like in the vast majority of cases that the building looks
|
||||
# worse in the newer epc, so we can switch the orders
|
||||
|
|
@ -53,6 +53,7 @@ def app():
|
|||
df = data_processor.pre_process()
|
||||
cleaning_averages = data_processor.make_cleaning_averages()
|
||||
|
||||
data_by_urpn = []
|
||||
for uprn, property_data in df.groupby("UPRN", observed=True):
|
||||
|
||||
# Fixed features - these are property attributes that shouldn't change over time
|
||||
|
|
@ -85,8 +86,7 @@ def app():
|
|||
# We include the lodgement date here as we probably need to factor time into the
|
||||
# model, since EPC standards and rigour have changed over time
|
||||
variable_data = modified_property_data[
|
||||
COMPONENT_FEATURES
|
||||
+ ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE]
|
||||
COMPONENT_FEATURES + ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE]
|
||||
]
|
||||
|
||||
# Note: we look at changes between subsequent EPCS, however we could look at other permutations
|
||||
|
|
@ -104,11 +104,15 @@ def app():
|
|||
gets_better = earliest_record[RDSAP_RESPONSE] <= latest_record[RDSAP_RESPONSE]
|
||||
|
||||
if gets_better:
|
||||
rdsap_change = latest_record[RDSAP_RESPONSE] - earliest_record[RDSAP_RESPONSE]
|
||||
heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - earliest_record[HEAT_DEMAND_RESPONSE]
|
||||
starting_sap = earliest_record[RDSAP_RESPONSE]
|
||||
starting_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
|
||||
rdsap_change = latest_record[RDSAP_RESPONSE] - starting_sap
|
||||
heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
|
||||
else:
|
||||
rdsap_change = earliest_record[RDSAP_RESPONSE] - latest_record[RDSAP_RESPONSE]
|
||||
heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - latest_record[HEAT_DEMAND_RESPONSE]
|
||||
starting_sap = latest_record[RDSAP_RESPONSE]
|
||||
starting_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
|
||||
rdsap_change = earliest_record[RDSAP_RESPONSE] - starting_sap
|
||||
heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
|
||||
|
||||
if rdsap_change == 0:
|
||||
continue
|
||||
|
|
@ -127,24 +131,28 @@ def app():
|
|||
"UPRN": uprn,
|
||||
"RDSAP_CHANGE": rdsap_change,
|
||||
"HEAT_DEMAND_CHANGE": heat_demand_change,
|
||||
"STARTING_SAP": starting_sap,
|
||||
"STARTING_HEAT_DEMAND": starting_heat_demand,
|
||||
**fixed_data,
|
||||
**features.to_dict(),
|
||||
}
|
||||
)
|
||||
|
||||
property_model_df = pd.DataFrame(property_model_data)
|
||||
# Add some temporal features - we look at the days from the standard starting point in time
|
||||
# for the starting and ending date so all records are from a fixed point
|
||||
# TODO: implement me
|
||||
property_model_df["DAYS_TO_STARTING"] = None
|
||||
property_model_df["DAYS_TO_ENDING"] = None
|
||||
data_by_urpn.extend(property_model_data)
|
||||
|
||||
dataset.append(property_model_df)
|
||||
data_by_urpn_df = pd.DataFrame(data_by_urpn)
|
||||
# Add some temporal features - we look at the days from the standard starting point in time
|
||||
# for the starting and ending date so all records are from a fixed point
|
||||
# TODO: implement me
|
||||
data_by_urpn_df["DAYS_TO_STARTING"] = None
|
||||
data_by_urpn_df["DAYS_TO_ENDING"] = None
|
||||
|
||||
# TODO: We need to pre-process the data. For instance, rather than using static for roofs, walls and
|
||||
# floors, we may want to use the U-value. We may also want to handle the (assumed) tags
|
||||
# within descriptions
|
||||
|
||||
dataset.append(data_by_urpn_df)
|
||||
|
||||
cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0]
|
||||
cleaning_dataset.append(cleaning_averages)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue