From 7885467fa40240a2a2632b4b6e120cce5a047c61 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 4 Feb 2025 14:35:24 +0000
Subject: [PATCH] formatting output

---
 .../stonewater/Wave 3 Preparation.py          | 42 +++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)
diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py
index 1748f624..fcde164e 100644
--- a/etl/customers/stonewater/Wave 3 Preparation.py	
+++ b/etl/customers/stonewater/Wave 3 Preparation.py	
@@ -3777,6 +3777,21 @@ def revised_model():
     no_match = []
     matches = []
     for _, home in tqdm(new_priority_postcodes.iterrows(), total=len(new_priority_postcodes)):
+
+        # We check if the property was surveyed
+        survey_result = coordinated_packages[
+            coordinated_packages["Organisation Reference"] == home["Organisation Reference"]
+            ]
+        if not survey_result.empty:
+            to_extend = [
+                {
+                    "Organisation Reference": home["Organisation Reference"],
+                    "Best Match Organisation Reference": m,
+                    "Was Surveyed": True
+                } for m in survey_result["Organisation Reference"].values
+            ]
+            matches.extend(to_extend)
+
         closest_match = find_nearest_matching_property(coordinated_packages, home)
         if closest_match is None:
             no_match.append(home["Organisation Reference"])
@@ -3785,7 +3800,8 @@ def revised_model():
         to_extend = [
             {
                 "Organisation Reference": home["Organisation Reference"],
-                "Best Match Organisation Reference": m
+                "Best Match Organisation Reference": m,
+                "Was Surveyed": False
             } for m in closest_match["Organisation Reference"].values
         ]
         matches.extend(to_extend)
@@ -4010,7 +4026,8 @@ def revised_model():
         [
             'Organisation Reference', 'Address', 'Postcode', 'Address ID', 'uprn', 'Archetype ID',
             'SAP', 'SAP Band', "Property Type", "Walls", "Roofs", 'Glazing',
-            'Heating', 'Main Fuel', 'Hot Water', 'Estimated SAP Rating', 'Estimated EPC Rating'
+            'Heating', 'Main Fuel', 'Hot Water', 'Number of matches', 'Proportion',
+            'Estimated SAP Rating', 'Estimated EPC Rating'
         ]
     ].rename(
         columns={
@@ -4023,6 +4040,7 @@ def revised_model():
             'Heating': 'Parity - Heating',
             'Main Fuel': 'Parity - Main Fuel',
             'Hot Water': 'Parity - Hot Water',
+            'Proportion': 'Proportion of matched properties with same EPC rating',
         }
     ).merge(
         epc_df[
@@ -4061,7 +4079,25 @@ def revised_model():
         on='Organisation Reference'
     )
 
-    worksheet["Years Since Last EPC"]
+    worksheet["Years Since Last EPC"] = pd.Timestamp.now().year - pd.to_datetime(
+        worksheet["Last EPC - Date Lodged"]).dt.year
+
+    worksheet["Last EPC - uprn"] = worksheet["Last EPC - uprn"].astype("Int64").astype(str)
+
+    worksheet["uprn"] = np.where(
+        pd.isnull(worksheet["uprn"]) & pd.notnull(worksheet["Last EPC - uprn"]),
+        worksheet["Last EPC - uprn"],
+        worksheet["uprn"]
+    )
+
+    worksheet["uprn"] = worksheet["uprn"].replace("<NA>", "")
+
+    # Save to Excel with multiple sheets
+    excel_path = os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "04022025 Stonewater Priority List.xlsx")
+    with pd.ExcelWriter(excel_path, engine="xlsxwriter") as writer:
+        worksheet.to_excel(writer, sheet_name="Worksheet", index=False, header=True)
+        mapped_lookup.to_excel(writer, sheet_name="Lookup Table", index=False, header=True)
+        output_coordination_sheet.to_excel(writer, sheet_name="Coordination", index=False, header=True)
 
 # if __name__ == "__main__":
 #     main()