tidying up stonewater work

2026-07-27 23:35:01 +00:00 · 2025-02-03 12:54:57 +00:00 · 2025-02-03 12:54:57 +00:00 · 01a5077c17
commit 01a5077c17
parent f6d8688698
1 changed files with 221 additions and 3 deletions
--- a/etl/customers/stonewater/Wave
+++ b/etl/customers/stonewater/Wave
@ -1,4 +1,6 @@
 import os
+from pyexpat import features
+
 import PyPDF2
 import re
 import pandas as pd
@ -1704,7 +1706,6 @@ def append_stonewater_id():
    )
    model_proposed_sample = model_proposed_sample[~pd.isnull(model_proposed_sample["Address ID"])]
    model_proposed_sample["Address ID"] = model_proposed_sample["Address ID"].astype(int)
-    z = model_proposed_sample["Archetype ID"].drop_duplicates().sort_values()

    original_archetypes = pd.read_excel(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
@ -2942,7 +2943,6 @@ def revised_model():
    """

    # 1) Create the new list of properties
-
    new_priority_postcodes = pd.read_excel(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 "
        "priority list.xlsx"
@ -3188,7 +3188,13 @@ def revised_model():
            wates_coordination_sheet_abeyance
        ]
    )
-
+    # We correct the Asset ID for 34 Kempster Close
+    wates_coordination["Asset ID"] = np.where(
+        wates_coordination["Name"] == "34 Kempster Close",
+        "12005",
+        wates_coordination["Asset ID"]
+    )
+    
    wates_coordination["folder_path"] = wates_coordination["Sharepoint Folder"].apply(
        lambda x: extract_sharepoint_url(x)
    )
@ -3198,6 +3204,14 @@ def revised_model():
    ############################################################
    # NEW 450 COORDINATED RETROFIT ASSESSMENTS
    #############################################################
+    features = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
+        "master sheet.csv",
+        encoding='latin1'
+    )
+    features["Address ID"] = features["Address ID"].astype(str).astype(int)
+    features_to_merge = features[["Address ID", "Organisation Reference"]]
+
    retrofit_packages_board = pd.read_excel(
        os.path.join(
            CUSTOMER_FOLDER_PATH,
@ -3211,6 +3225,10 @@ def revised_model():
        retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
    ]

+    retrofit_packages_board = retrofit_packages_board.merge(
+        features_to_merge, how="left", on="Address ID"
+    )
+
    manual_filters = {
        "Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD",
        "6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG",
@ -3527,6 +3545,206 @@ def revised_model():
            continue

        raise Exception("No match")
+    wates_matching_lookup = pd.DataFrame(wates_matching_lookup)
+
+    # Merge lookup tables onto the coordination sheets
+    wates_coordination = wates_coordination.merge(
+        wates_matching_lookup, how="left", on="Name"
+    )
+    missed_asset_id = wates_coordination[pd.isnull(wates_coordination["Asset ID_x"])]
+    if not missed_asset_id.empty:
+        # We fill the missing ids
+        missing_lookup = {
+            "4  Sydnall Fields": 31231,
+            "12  Sydnall Fields": 31239,
+            "12  Athena Gardens": 28061,
+            "49  Banner Lane": 41189,
+            "4  Jonathan Road": 41232,
+            "8  Jonathan Road": 41236,
+            "1  Jonathan Road": 41229,
+            "96  Taunton Way": 31417,
+            "94  Taunton Way": 31418,
+            "1  Lady Lane": 29430,
+            "10  Jonathan Road": 41283,
+            "21  Jonathan Road": 41246,
+            "12  Ashcroft Close": 26399
+        }
+        for name, asset_id in missing_lookup.items():
+            wates_coordination["Asset ID_x"] = np.where(
+                wates_coordination["Name"] == name,
+                asset_id,
+                wates_coordination["Asset ID_x"]
+            )
+
+    ccs_coordination = ccs_coordination.merge(
+        ccs_matching_lookup, how="left", on="Name"
+    )
+
+    retrofit_packages_board = retrofit_packages_board.merge(
+        matching_lookup, how="left", on="Name"
+    )
+
+    # We combine this into a singular board
+    coordinated_packages = pd.concat(
+        [
+            retrofit_packages_board[
+                [
+                    "Name", "Postcode", 'Actual SAP Band', 'Actual SAP Rating',
+                    'Modelled SAP Band', 'Modelled SAP Rating', 'Package Ref',
+                    'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
+                    'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
+                    'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
+                    'Solar PV', 'Other measures', 'Organisation Reference',
+                ]
+            ],
+            ccs_coordination[
+                [
+                    # We don't have secondary wall insulation, Flat Roof, RIR, Heating Controls,
+                    # Solar PV
+                    "Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
+                    'SAP Band Install Package', 'Package Approved (Client)',
+                    'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
+                    'Ventilation', 'Heating', 'Other Measures', "Asset ID.1_y",
+                ]
+            ].rename(
+                columns={
+                    "SAP Band Pre": "Actual SAP Band",
+                    "SAP Rating Pre": "Actual SAP Rating",
+                    'SAP Rating Install Package': 'Modelled SAP Band',
+                    'SAP Band Install Package': 'Modelled SAP Rating',
+                    'Package Approved (Client)': 'Package Ref',
+                    'Wall Insulation': 'Main Wall Insulation',
+                    'Loft Insulation': 'Loft insulation',
+                    'Windows Upgrade': 'Window Upgrade',
+                    'Ext. Doors Upgrade': 'Door Upgrade',
+                    'Heating': 'Main Heating',
+                    'Other Measures': 'Other measures',
+                    'Asset ID.1_y': 'Organisation Reference',
+                }
+            ),
+            wates_coordination[
+                [
+                    "Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
+                    'SAP Band Install Package', 'Package Approved (Client)',
+                    'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
+                    'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x'
+
+                ]
+            ].rename(
+                columns={
+                    "SAP Band Pre": "Actual SAP Band",
+                    "SAP Rating Pre": "Actual SAP Rating",
+                    'SAP Rating Install Package': 'Modelled SAP Band',
+                    'SAP Band Install Package': 'Modelled SAP Rating',
+                    'Package Approved (Client)': 'Package Ref',
+                    'Wall Insulation': 'Main Wall Insulation',
+                    'Loft Insulation': 'Loft insulation',
+                    'Windows Upgrade': 'Window Upgrade',
+                    'Ext. Doors Upgrade': 'Door Upgrade',
+                    'Heating': 'Main Heating',
+                    'Other Measures': 'Other measures',
+                    'Asset ID_x': 'Organisation Reference',
+                }
+            )
+        ]
+    )
+
+    coordinated_packages["Organisation Reference"] = coordinated_packages["Organisation Reference"].astype(int)
+
+    # Merge the property features on
+    coordinated_packages = coordinated_packages.merge(
+        features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]],
+        how="left",
+        on="Organisation Reference"
+    )
+
+    # We need the features pertaining to these priority postcodes
+
+    def find_nearest_matching_property(coordinated_packages, home):
+        filter_levels = [
+            ["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
+            ["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
+            ["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
+            ["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"],
+            ["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"],
+            ["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"],
+        ]
+
+        for i, filters in enumerate(filter_levels):
+            match = coordinated_packages.copy()
+
+            for col in filters:
+                match = match[match[col] == home[col]]
+
+            if not match.empty:
+                return match
+
+        return None  # No match found
+
+    coordinated_packages["Postal Region"] = coordinated_packages["Postcode"].str.split(" ").str[0].str.strip()
+    new_priority_postcodes["Postal Region"] = new_priority_postcodes["Postcode"].str.split(" ").str[0].str.strip()
+
+    coordinated_packages["Roof Simple"] = coordinated_packages["Roofs"].str.split(":").str[0].str.strip()
+    new_priority_postcodes["Roof Simple"] = new_priority_postcodes["Roofs"].str.split(":").str[0].str.strip()
+
+    coordinated_packages["Primary Property Type"] = coordinated_packages["Property Type"].str.split(":").str[0]
+    new_priority_postcodes["Primary Property Type"] = new_priority_postcodes["Property Type"].str.split(":").str[0]
+
+    # For every property in the priority postcodes data, we look for a most appropriate matching property
+    no_match = []
+    matches = []
+    for _, home in tqdm(new_priority_postcodes.iterrows(), total=len(new_priority_postcodes)):
+        closest_match = find_nearest_matching_property(coordinated_packages, home)
+        if closest_match is None:
+            no_match.append(home["Organisation Reference"])
+            continue
+
+        to_extend = [
+            {
+                "Organisation Reference": home["Organisation Reference"],
+                "Best Match Organisation Reference": m
+            } for m in closest_match["Organisation Reference"].values
+        ]
+        matches.extend(to_extend)
+
+    no_match_summary = new_priority_postcodes[
+        new_priority_postcodes["Organisation Reference"].isin(
+            no_match
+        )
+    ].groupby(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"])[
+        "Organisation Reference"].count().reset_index()
+
+    no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False)
+
+    # len(no_match)
+    # 8764, 5607
+    # no_match_summary.shape
+    # (3953, 6), (2948, 6)
+
+    # We match the properties to their closest match
+
+    matches_df = pd.DataFrame(matches)
+    matches_df = matches_df.merge(
+        coordinated_packages[["Organisation Reference", "Actual SAP Band", "Actual SAP Rating"]],
+        left_on="Best Match Organisation Reference", right_on="Organisation Reference",
+        suffixes=("", " - Closest Match")
+    )
+    # We want to aggregate the matches, when we have multiple
+    aggregated_matches_df = []
+    for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
+        if mapped_matches.shape[0] == 1:
+            mapped_matches["Number of matches"] = 1
+            mapped_matches["Proportion"]
+            aggregated_matches_df.append(mapped_matches)
+            continue
+
+    mapped_priority_list = new_priority_postcodes.merge(
+        matches_df, on="Organisation Reference",
+    )
+    # We merge on the EPC ratings for the matched properties
+    mapped_priority_list = mapped_priority_list.merge(
+
+    )

 # if __name__ == "__main__":
 #     main()