Adding postcode summary to stonewater

2026-07-27 23:35:01 +00:00 · 2024-11-12 15:49:28 +00:00 · 2024-11-12 15:49:28 +00:00 · dfa37f86d4
commit dfa37f86d4
parent fe6e83314f
1 changed files with 62 additions and 17 deletions
--- a/etl/customers/stonewater/Wave
+++ b/etl/customers/stonewater/Wave
@ -916,13 +916,14 @@ def main():
        "18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX',
        '3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX',
        '16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ',
-        '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX'
+        '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX',
+        '7 Croft Street': 'StonewaterSurveys_8/333-2-7 Croft Street-HR6 8LA'
    }

    # We now match this retrofit packages board to the extracted data
    matching_lookup = []
    for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
-        
+
        # Handle the case that has the wrong postcode in the asset data
        if home["Name"] in manual_filters:
            filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy()
@ -986,11 +987,11 @@ def main():
    missing_ids = list(missing_ids)
    if missing_ids:
        # We check that the missing ids have no data yet
-        missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)]
-        missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
-            CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
+        # missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)]
+        # missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
+        #     CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")

-        if len(missing_ids) != 8:
+        if len(missing_ids) != 6:
            raise Exception("Unacceptable number of missings")

    if matching_lookup["Address ID"].duplicated().sum():
@ -1083,12 +1084,20 @@ def main():
    stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"])
    windows_data["Address ID"] = windows_data["Address ID"].astype(float)
    stonewater_data = stonewater_data.merge(windows_data, on="Address ID", how="left")
+    stonewater_data = stonewater_data.sort_values("Archetype ID", ascending=True)

    if stonewater_data["Address ID"].duplicated().sum():
        raise Exception("Duplicate Address IDs")

+    for c in [
+        'Window attributes - Fitted/renewed date',
+        'Parent Asset Window attributes - Fitted/renewed date',
+        'Fitted/renewed date'
+    ]:
+        stonewater_data[c] = stonewater_data[c].astype(str)
+
    # Save this data to excel
-    stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages.xlsx", index=False)
+    stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V2.xlsx", index=False)

    cost_sheet = [
        {
@ -1173,7 +1182,7 @@ def main():

    create_proposed_wave_3_bid(
        costed_packages_filepath=os.path.join(
-            CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) MR Review v1.xlsx"
+            CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) Single Model V3.xlsx"
        ),
        archetypes_sheet_filepath=os.path.join(
            CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
@ -1183,8 +1192,8 @@ def main():

 def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepath):
    # We read in the costed packages
-    # Note: Header as 12 is for Matt Ratcliff's reviewed version
    costed_packages = pd.read_excel(costed_packages_filepath, header=13, sheet_name="Modelled Packages")
+    costed_packages = costed_packages[~pd.isnull(costed_packages["Address"])]

    archetypes_to_cost = costed_packages[
        [
@ -1213,16 +1222,11 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
                 'Existing Primary Heating System',
                 'Existing Primary Heating PCDF Reference'])

-    # We take properties that are EPC D and below (61% of units)
+    # We take properties that are EPC D and below (59% of units)
    archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]

    archetypes_to_cost["Has been modelled"] = ~pd.isnull(archetypes_to_cost["Modelled SAP Band"])

-    average_cost = archetypes_to_cost[
-        archetypes_to_cost["Has been modelled"]
-    ]['Total Cost of Measures inc Contingency'].mean()
-    print(average_cost)
-
    # These are the Arhetypes that will likely be suitable for Wave 3
    archetypes_sheet = pd.read_excel(archetypes_sheet_filepath, header=4)
    archetypes_sheet = archetypes_sheet[~pd.isnull(archetypes_sheet["Address ID"])]
@ -1236,7 +1240,21 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
        how="left"
    )

-    proposed_sample = archetypes_sheet[archetypes_sheet["Archetype ID"].isin(archetypes_to_cost["Archetype ID"])]
+    proposed_sample = archetypes_sheet[
+        archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
+    ]
+
+    not_proposed = archetypes_sheet[
+        ~archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str))
+    ]
+
+    # archetypes_without_survey = []
+    # for p in list(set(not_proposed)):
+    #     filtered = costed_packages[costed_packages["Archetype ID"].astype(int).astype(str) == p]
+    #     if filtered.empty:
+    #         archetypes_without_survey.append(p)
+
+    # Can we propose anything about archetypes that were not surveyed?

    proposed_sample = proposed_sample[
        [
@ -1247,6 +1265,8 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa

    # We classify into high and low confidence

+    archetypes_to_cost["Surveyed Main Roof"] = archetypes_to_cost["Surveyed Main Roof"].fillna("")
+
    match_classification = []
    for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):

@ -1331,8 +1351,33 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
        None, proposed_sample["Total Cost of Measures inc Contingency"]
    )

+    proposed_sample = proposed_sample.sort_values("Archetype ID", ascending=True)
+
    # Save excel
-    proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid (WIP).xlsx", index=False)
+    proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid V2 (WIP).xlsx", index=False)
+
+    # For each postcode that's in the bid, we also summarise the number of units in the bid and number left out
+    proposed_sample_postcodes = proposed_sample["Postcode"].unique()
+
+    postcode_summary = []
+    for postcode in proposed_sample_postcodes:
+        in_proposal = proposed_sample[proposed_sample["Postcode"] == postcode]
+        not_in_proposal = not_proposed[not_proposed["Postcode"] == postcode]
+        postcode_summary.append(
+            {
+                "Postcode": postcode,
+                "Number of properties in Proposal": len(in_proposal),
+                "Number of properties not in Proposal": len(not_in_proposal)
+            }
+        )
+    postcode_summary = pd.DataFrame(postcode_summary)
+    postcode_summary = postcode_summary.sort_values(
+        "Number of properties not in Proposal",
+        ascending=False).reset_index(drop=True)
+
+    postcode_summary.to_excel(
+        CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid Postcode Summary.xlsx", index=False
+    )


 def find_remaining_surveys():