diff --git a/model_data/simulation_system/generate_rdsap_change.py b/model_data/simulation_system/generate_rdsap_change.py
index 2d691d62..53107df0 100644
--- a/model_data/simulation_system/generate_rdsap_change.py
+++ b/model_data/simulation_system/generate_rdsap_change.py
@@ -1,4 +1,3 @@
-import numpy as np
 import pandas as pd
 from tqdm import tqdm
 
@@ -31,6 +30,7 @@ def app():
 
     # TODO [x] : Does energy tariff make a difference
     #           - leave for now but it may not
+    # TODO: [x] : Add starting SAP and head demand as a feature
     # TODO [x] : If SAP hasn't changed, we don't include the record
     # TODO [x]: If SAP gets worse, it genuinely looks like in the vast majority of cases that the building looks
     #           worse in the newer epc, so we can switch the orders
@@ -53,6 +53,7 @@ def app():
         df = data_processor.pre_process()
         cleaning_averages = data_processor.make_cleaning_averages()
 
+        data_by_urpn = []
         for uprn, property_data in df.groupby("UPRN", observed=True):
 
             # Fixed features - these are property attributes that shouldn't change over time
@@ -85,8 +86,7 @@ def app():
             # We include the lodgement date here as we probably need to factor time into the
             # model, since EPC standards and rigour have changed over time
             variable_data = modified_property_data[
-                COMPONENT_FEATURES
-                + ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE]
+                COMPONENT_FEATURES + ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE]
                 ]
 
             # Note: we look at changes between subsequent EPCS, however we could look at other permutations
@@ -104,11 +104,15 @@ def app():
                 gets_better = earliest_record[RDSAP_RESPONSE] <= latest_record[RDSAP_RESPONSE]
 
                 if gets_better:
-                    rdsap_change = latest_record[RDSAP_RESPONSE] - earliest_record[RDSAP_RESPONSE]
-                    heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - earliest_record[HEAT_DEMAND_RESPONSE]
+                    starting_sap = earliest_record[RDSAP_RESPONSE]
+                    starting_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
+                    rdsap_change = latest_record[RDSAP_RESPONSE] - starting_sap
+                    heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
                 else:
-                    rdsap_change = earliest_record[RDSAP_RESPONSE] - latest_record[RDSAP_RESPONSE]
-                    heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - latest_record[HEAT_DEMAND_RESPONSE]
+                    starting_sap = latest_record[RDSAP_RESPONSE]
+                    starting_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
+                    rdsap_change = earliest_record[RDSAP_RESPONSE] - starting_sap
+                    heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
 
                 if rdsap_change == 0:
                     continue
@@ -127,24 +131,28 @@ def app():
                         "UPRN": uprn,
                         "RDSAP_CHANGE": rdsap_change,
                         "HEAT_DEMAND_CHANGE": heat_demand_change,
+                        "STARTING_SAP": starting_sap,
+                        "STARTING_HEAT_DEMAND": starting_heat_demand,
                         **fixed_data,
                         **features.to_dict(),
                     }
                 )
 
-            property_model_df = pd.DataFrame(property_model_data)
-            # Add some temporal features - we look at the days from the standard starting point in time
-            # for the starting and ending date so all records are from a fixed point
-            # TODO: implement me
-            property_model_df["DAYS_TO_STARTING"] = None
-            property_model_df["DAYS_TO_ENDING"] = None
+            data_by_urpn.extend(property_model_data)
 
-            dataset.append(property_model_df)
+        data_by_urpn_df = pd.DataFrame(data_by_urpn)
+        # Add some temporal features - we look at the days from the standard starting point in time
+        # for the starting and ending date so all records are from a fixed point
+        # TODO: implement me
+        data_by_urpn_df["DAYS_TO_STARTING"] = None
+        data_by_urpn_df["DAYS_TO_ENDING"] = None
 
         # TODO: We need to pre-process the data. For instance, rather than using static for roofs, walls and
         #       floors, we may want to use the U-value. We may also want to handle the (assumed) tags
         #       within descriptions
 
+        dataset.append(data_by_urpn_df)
+
         cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0]
         cleaning_dataset.append(cleaning_averages)