diff --git a/etl/customers/stonewater/shdf_3_clustering.py b/etl/customers/stonewater/shdf_3_clustering.py
index 0704d64d..93797db0 100644
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
@@ -1992,6 +1992,13 @@ def updated_version():
 
     priority_postcodes, previous_waves_address_id, master_sheet = read_stonewater_asset_data()
 
+    # Pull in the EPC data
+    epc_data = read_epc_data(uprn_lookup_2)
+
+    ########################################################################
+    # Prepare the data
+    ########################################################################
+
     # Filter the asset list down to the priority postcodes
     asset_list["is_priority_postcode"] = asset_list["postcode"].isin(priority_postcodes)
 
@@ -2012,8 +2019,17 @@ def updated_version():
         right_on="Address ID"
     )
 
+    asset_list = asset_list.merge(
+        epc_data[["internal_id", "current-energy-efficiency", "lodgement-date", "estimated"]],
+        how="left",
+        on="internal_id"
+    )
+    asset_list["days_since_lodgement_epc"] = (
+        datetime.now() - pd.to_datetime(asset_list["lodgement-date"], errors="coerce", dayfirst=True)
+    ).dt.days
+
     # Flag properties that were surveyed within the last 5 years
-    asset_list["epc_within_5_years"] = asset_list["days_since_lodgement"] < 5 * 365
+    asset_list["epc_within_5_years"] = asset_list["days_since_lodgement_epc"] < 5 * 365
 
     # Identify properties where they've had an EPC done within the last 5 years, where the SAP rating is already
     # a EPC C. Alternatively, any property with an EPC rating of 80 or above is also considered, regardless of when
@@ -2027,9 +2043,9 @@ def updated_version():
         asset_list["is_priority_postcode"] & ~asset_list["In Osmosis Wave 2.1"] & ~asset_list["is_epc_c_or_above"]
         ][
         [
-            "internal_id", "customer_asset_id", "postcode", "house_number", "address1", "address2", "city_town",
-            "county", "external_address_id", "owner", "days_since_lodgement", "Lodgement Date", "epc_within_5_years",
-            "EPC Rating"
+            "internal_id", "uprn", "udprn", "customer_asset_id", "postcode", "house_number", "address1", "address2",
+            "city_town", "county", "external_address_id", "owner", "days_since_lodgement", "Lodgement Date",
+            "epc_within_5_years", "EPC Rating", "estimated", "current-energy-efficiency", "lodgement-date"
         ]
     ]
 
@@ -2043,7 +2059,22 @@ def updated_version():
         right_on="Address ID"
     )
 
-    # Pull in the EPC data
+    # For SAP, we use the most recent EPC if epc_within_5_years is True, otherwise we use the parity modelled sap
+    clustering_features["current-energy-efficiency"] = clustering_features["current-energy-efficiency"].astype(float)
+    clustering_features["representative_sap"] = np.where(
+        clustering_features["epc_within_5_years"],
+        clustering_features["current-energy-efficiency"],
+        clustering_features["parity_modelled_sap"]
+    )
+
+    # incorect_epcs = clustering_features[
+    #     clustering_features["EPC Rating"] != clustering_features["current-energy-efficiency"]]
+    # incorect_epcs = incorect_epcs[
+    #     ~pd.isnull(incorect_epcs["current-energy-efficiency"]) & pd.isnull(incorect_epcs["estimated"])
+    #     ]
+    # incorect_epcs = incorect_epcs.rename(columns={"current-energy-efficiency": "Current SAP Rating"})
+    # # Store data
+    # incorect_epcs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Incorrect EPCs.csv", index=False)
 
 
 def read_asset_list():
@@ -2260,6 +2291,7 @@ def read_epc_data(uprn_lookup_2):
         s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
         bucket_name="retrofit-data-dev"
     )
+    epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
 
     complete_epcs = pd.concat([epc_data, epc_data_batch_2])