Added starting sap and starting heat demand

2026-08-02 21:08:24 +00:00 · 2023-09-06 17:17:29 +01:00 · 2023-09-06 17:17:29 +01:00 · 235d85d5bd
commit 235d85d5bd
parent e516a6ac41
1 changed files with 22 additions and 14 deletions
--- a/model_data/simulation_system/generate_rdsap_change.py
+++ b/model_data/simulation_system/generate_rdsap_change.py
@ -1,4 +1,3 @@
-import numpy as np
 import pandas as pd
 from tqdm import tqdm

@ -31,6 +30,7 @@ def app():

    # TODO [x] : Does energy tariff make a difference
    #           - leave for now but it may not
+    # TODO: [x] : Add starting SAP and head demand as a feature
    # TODO [x] : If SAP hasn't changed, we don't include the record
    # TODO [x]: If SAP gets worse, it genuinely looks like in the vast majority of cases that the building looks
    #           worse in the newer epc, so we can switch the orders
@ -53,6 +53,7 @@ def app():
        df = data_processor.pre_process()
        cleaning_averages = data_processor.make_cleaning_averages()

+        data_by_urpn = []
        for uprn, property_data in df.groupby("UPRN", observed=True):

            # Fixed features - these are property attributes that shouldn't change over time
@ -85,8 +86,7 @@ def app():
            # We include the lodgement date here as we probably need to factor time into the
            # model, since EPC standards and rigour have changed over time
            variable_data = modified_property_data[
-                COMPONENT_FEATURES
-                + ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE]
+                COMPONENT_FEATURES + ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE]
                ]

            # Note: we look at changes between subsequent EPCS, however we could look at other permutations
@ -104,11 +104,15 @@ def app():
                gets_better = earliest_record[RDSAP_RESPONSE] <= latest_record[RDSAP_RESPONSE]

                if gets_better:
-                    rdsap_change = latest_record[RDSAP_RESPONSE] - earliest_record[RDSAP_RESPONSE]
-                    heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - earliest_record[HEAT_DEMAND_RESPONSE]
+                    starting_sap = earliest_record[RDSAP_RESPONSE]
+                    starting_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
+                    rdsap_change = latest_record[RDSAP_RESPONSE] - starting_sap
+                    heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
                else:
-                    rdsap_change = earliest_record[RDSAP_RESPONSE] - latest_record[RDSAP_RESPONSE]
-                    heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - latest_record[HEAT_DEMAND_RESPONSE]
+                    starting_sap = latest_record[RDSAP_RESPONSE]
+                    starting_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
+                    rdsap_change = earliest_record[RDSAP_RESPONSE] - starting_sap
+                    heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand

                if rdsap_change == 0:
                    continue
@ -127,24 +131,28 @@ def app():
                        "UPRN": uprn,
                        "RDSAP_CHANGE": rdsap_change,
                        "HEAT_DEMAND_CHANGE": heat_demand_change,
+                        "STARTING_SAP": starting_sap,
+                        "STARTING_HEAT_DEMAND": starting_heat_demand,
                        **fixed_data,
                        **features.to_dict(),
                    }
                )

-            property_model_df = pd.DataFrame(property_model_data)
-            # Add some temporal features - we look at the days from the standard starting point in time
-            # for the starting and ending date so all records are from a fixed point
-            # TODO: implement me
-            property_model_df["DAYS_TO_STARTING"] = None
-            property_model_df["DAYS_TO_ENDING"] = None
+            data_by_urpn.extend(property_model_data)

-            dataset.append(property_model_df)
+        data_by_urpn_df = pd.DataFrame(data_by_urpn)
+        # Add some temporal features - we look at the days from the standard starting point in time
+        # for the starting and ending date so all records are from a fixed point
+        # TODO: implement me
+        data_by_urpn_df["DAYS_TO_STARTING"] = None
+        data_by_urpn_df["DAYS_TO_ENDING"] = None

        # TODO: We need to pre-process the data. For instance, rather than using static for roofs, walls and
        #       floors, we may want to use the U-value. We may also want to handle the (assumed) tags
        #       within descriptions

+        dataset.append(data_by_urpn_df)
+
        cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0]
        cleaning_dataset.append(cleaning_averages)