From dfa37f86d469d4ee926ee0dc2438629fb35e17cc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 12 Nov 2024 15:49:28 +0000
Subject: [PATCH] Adding postcode summary to stonewater

---
 .../stonewater/Wave 3 Preparation.py          | 79 +++++++++++++++----
 1 file changed, 62 insertions(+), 17 deletions(-)

diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py
index 0036a0a4..889d8f88 100644
--- a/etl/customers/stonewater/Wave 3 Preparation.py	
+++ b/etl/customers/stonewater/Wave 3 Preparation.py	
@@ -916,13 +916,14 @@ def main():
         "18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX',
         '3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX',
         '16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ',
-        '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX'
+        '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX',
+        '7 Croft Street': 'StonewaterSurveys_8/333-2-7 Croft Street-HR6 8LA'
     }
 
     # We now match this retrofit packages board to the extracted data
     matching_lookup = []
     for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
-        
+
         # Handle the case that has the wrong postcode in the asset data
         if home["Name"] in manual_filters:
             filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy()
@@ -986,11 +987,11 @@ def main():
     missing_ids = list(missing_ids)
     if missing_ids:
         # We check that the missing ids have no data yet
-        missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)]
-        missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
-            CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
+        # missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)]
+        # missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
+        #     CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
 
-        if len(missing_ids) != 8:
+        if len(missing_ids) != 6:
             raise Exception("Unacceptable number of missings")
 
     if matching_lookup["Address ID"].duplicated().sum():
@@ -1083,12 +1084,20 @@ def main():
     stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"])
     windows_data["Address ID"] = windows_data["Address ID"].astype(float)
     stonewater_data = stonewater_data.merge(windows_data, on="Address ID", how="left")
+    stonewater_data = stonewater_data.sort_values("Archetype ID", ascending=True)
 
     if stonewater_data["Address ID"].duplicated().sum():
         raise Exception("Duplicate Address IDs")
 
+    for c in [
+        'Window attributes - Fitted/renewed date',
+        'Parent Asset Window attributes - Fitted/renewed date',
+        'Fitted/renewed date'
+    ]:
+        stonewater_data[c] = stonewater_data[c].astype(str)
+
     # Save this data to excel
-    stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages.xlsx", index=False)
+    stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V2.xlsx", index=False)
 
     cost_sheet = [
         {
@@ -1173,7 +1182,7 @@ def main():
 
     create_proposed_wave_3_bid(
         costed_packages_filepath=os.path.join(
-            CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) MR Review v1.xlsx"
+            CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) Single Model V3.xlsx"
         ),
         archetypes_sheet_filepath=os.path.join(
             CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
@@ -1183,8 +1192,8 @@ def main():
 
 def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepath):
     # We read in the costed packages
-    # Note: Header as 12 is for Matt Ratcliff's reviewed version
     costed_packages = pd.read_excel(costed_packages_filepath, header=13, sheet_name="Modelled Packages")
+    costed_packages = costed_packages[~pd.isnull(costed_packages["Address"])]
 
     archetypes_to_cost = costed_packages[
         [
@@ -1213,16 +1222,11 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
                  'Existing Primary Heating System',
                  'Existing Primary Heating PCDF Reference'])
 
-    # We take properties that are EPC D and below (61% of units)
+    # We take properties that are EPC D and below (59% of units)
     archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]
 
     archetypes_to_cost["Has been modelled"] = ~pd.isnull(archetypes_to_cost["Modelled SAP Band"])
 
-    average_cost = archetypes_to_cost[
-        archetypes_to_cost["Has been modelled"]
-    ]['Total Cost of Measures inc Contingency'].mean()
-    print(average_cost)
-
     # These are the Arhetypes that will likely be suitable for Wave 3
     archetypes_sheet = pd.read_excel(archetypes_sheet_filepath, header=4)
     archetypes_sheet = archetypes_sheet[~pd.isnull(archetypes_sheet["Address ID"])]
@@ -1236,7 +1240,21 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
         how="left"
     )
 
-    proposed_sample = archetypes_sheet[archetypes_sheet["Archetype ID"].isin(archetypes_to_cost["Archetype ID"])]
+    proposed_sample = archetypes_sheet[
+        archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
+    ]
+
+    not_proposed = archetypes_sheet[
+        ~archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
+    ]
+
+    # archetypes_without_survey = []
+    # for p in list(set(not_proposed)):
+    #     filtered = costed_packages[costed_packages["Archetype ID"].astype(int).astype(str) == p]
+    #     if filtered.empty:
+    #         archetypes_without_survey.append(p)
+
+    # Can we propose anything about archetypes that were not surveyed?
 
     proposed_sample = proposed_sample[
         [
@@ -1247,6 +1265,8 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
 
     # We classify into high and low confidence
 
+    archetypes_to_cost["Surveyed Main Roof"] = archetypes_to_cost["Surveyed Main Roof"].fillna("")
+
     match_classification = []
     for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):
 
@@ -1331,8 +1351,33 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
         None, proposed_sample["Total Cost of Measures inc Contingency"]
     )
 
+    proposed_sample = proposed_sample.sort_values("Archetype ID", ascending=True)
+
     # Save excel
-    proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid (WIP).xlsx", index=False)
+    proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid V2 (WIP).xlsx", index=False)
+
+    # For each postcode that's in the bid, we also summarise the number of units in the bid and number left out
+    proposed_sample_postcodes = proposed_sample["Postcode"].unique()
+
+    postcode_summary = []
+    for postcode in proposed_sample_postcodes:
+        in_proposal = proposed_sample[proposed_sample["Postcode"] == postcode]
+        not_in_proposal = not_proposed[not_proposed["Postcode"] == postcode]
+        postcode_summary.append(
+            {
+                "Postcode": postcode,
+                "Number of properties in Proposal": len(in_proposal),
+                "Number of properties not in Proposal": len(not_in_proposal)
+            }
+        )
+    postcode_summary = pd.DataFrame(postcode_summary)
+    postcode_summary = postcode_summary.sort_values(
+        "Number of properties not in Proposal",
+        ascending=False).reset_index(drop=True)
+
+    postcode_summary.to_excel(
+        CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid Postcode Summary.xlsx", index=False
+    )
 
 
 def find_remaining_surveys():