working on proposed sample for stonewater

2026-07-27 23:35:01 +00:00 · 2024-10-30 20:30:05 +00:00 · 2024-10-30 20:30:05 +00:00 · 7e26fb4b86
commit 7e26fb4b86
parent bccf3c621b
1 changed files with 201 additions and 2 deletions
--- a/etl/customers/stonewater/Wave
+++ b/etl/customers/stonewater/Wave
@ -486,7 +486,7 @@ def extract_epr(pdf_path):
        data["Postcode"] = data["Address"].split(",")[-1].strip()

        # Extract Current and Potential SAP ratings
-        sap_match = re.search(r"GG \(1-20\)(\d{1,2})(\d{1,2})", text)
+        sap_match = re.search(r"GG \(1-20\)\s*(\d{1,2})\s*(\d{1,2})", text)
        current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2))
        data["Current SAP Rating"] = current_sap

@ -896,7 +896,6 @@ def main():
    # Find Osmosis IDs that are in the packages board but not in the matching looking
    missing_ids = set(retrofit_packages_board["Address ID"]) - set(matching_lookup["Address ID"])
    missing_ids = list(missing_ids)
-    print(len(missing_ids))
    if missing_ids:
        # We check that the missing ids have no data yet
        if len(missing_ids) != 8:
@ -937,6 +936,7 @@ def main():
                "Actual SAP Rating",
                "Modelled SAP Band",
                "Modelled SAP Rating",
+                "Package Ref",
            ] + measure_columns
            ],
        on=["Address ID", "Name"],
@ -995,7 +995,206 @@ def main():
    if stonewater_data["Address ID"].duplicated().sum():
        raise Exception("Duplicate Address IDs")

+    # Save this data to excel
+    stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages.xlsx", index=False)
+
+    cost_sheet = [
+        {
+            "measure": "EWI 0.30 w.m2.K", "cost": 298.35, "unit": "m2"
+        },
+        {
+            "measure": "CWI RdSAP Default", "cost": 14.21, "unit": "m2"
+        },
+        {
+            "measure": "Poss Extract CWI & Refill (issues identified)", "cost": 14.21 + 25, "unit": "m2"
+        },
+        {
+            "measure": "IWI 0.30 w.m2.K", "cost": 244.80, "unit": "m2"
+        },
+        {
+            "measure": "EWI/IWI 0.3", "cost": (298.35 + 244.8) / 2, "unit": "m2"
+        },
+        {
+            "measure": "Loft Insulation 0.11 w.m2.K", "cost": 16.07, "unit": "m2"
+        },
+        {
+            "measure": "Flat Roof 0.11 w.m2.K", "cost": 195, "unit": "m2"
+        },
+        {
+            "measure": "DG Window 1.30 w.m2.K", "cost": 1140, "unit": "each"
+        },
+        {
+            "measure": "Secondary 2.40", "cost": 974, "unit": "each"
+        },
+        {
+            "measure": "Ins. Door 1.30 w.m2.K", "cost": None, "unit": "each"
+        },
+        {
+            "measure": "Ins. Door 1.40 w.m2.K", "cost": None, "unit": "each"
+        },
+        {
+            "measure": "DMEV", "cost": 900, "unit": "each"
+        },
+        {
+            "measure": "ASHP Vaillant 102607 5kw", "cost": None, "unit": "each"
+        },
+        {
+            "measure": "HHRSH Quantum 150", "cost": None, "unit": "each"
+        },
+        {
+            "measure": "Dual Stat Tank 210lt 50mm Foam", "cost": None, "unit": "each"
+        },
+        {
+            "measure": "Dual Stat Tank 160lt 50mm Foam", "cost": None, "unit": "each"
+        },
+        {
+            "measure": "Dual Stat Tank 110lt 50mm Foam", "cost": None, "unit": "each"
+        },
+        {
+            "measure": "Smart Thermostat", "cost": 1200, "unit": "each"
+        },
+        {
+            "measure": "TRV's", "cost": 350, "unit": "each"
+        },
+        {
+            "measure": "Solar PV - 3.0kwp", "cost": 4365.0, "unit": "each"
+        },
+        {
+            "measure": "Solar PV - 1.5kwp", "cost": 3881, "unit": "each"
+        },
+        {
+            "measure": "LEL", "cost": 35, "unit": "per bulb"
+        },
+        {
+            "measure": "Roof 0.16 - Walls 0.30", "cost": 180, "unit": "floor area m2"
+        },
+        {
+            "measure": "Roof 0.16 - Walls 0.16", "cost": 180, "unit": "floor area m2"
+        },
+    ]
+    cost_sheet = pd.DataFrame(cost_sheet)
+
+    # Save cost sheet - ideally this will be used as a secondary sheet for Stonewater
+    cost_sheet.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - cost sheet.xlsx", index=False)
+
+    stonewater_data["Room in Roof"].value_counts()
+
    # stonewater_data[~pd.isnull(stonewater_data["Room in Roof"])]["survey_folder"].values

+    create_proposed_wave_3_bid(
+        costed_packages_filepath=os.path.join(
+            CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP).xlsx"
+        ),
+        archetypes_sheet_filepath=os.path.join(
+            CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
+        )
+    )
+
+
+def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepath):
+    # We read in the costed packages
+    costed_packages = pd.read_excel(costed_packages_filepath)
+
+    archetypes_to_cost = costed_packages[
+        [
+            "Name", "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Modelled SAP Band",
+            "Modelled SAP Rating", 'Total Cost of Measures', 'Contingency Cost',
+            'Total Cost of Measures inc Contingency'
+        ]
+    ].copy()
+
+    # We take properties that are EPC D and below (61% of units)
+    archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]
+
+    archetypes_to_cost["Has been modelled"] = ~pd.isnull(archetypes_to_cost["Modelled SAP Band"])
+
+    average_cost = archetypes_to_cost[
+        archetypes_to_cost["Has been modelled"]
+    ]['Total Cost of Measures inc Contingency'].mean()
+    print(average_cost)
+
+    # These are the Arhetypes that will likely be suitable for Wave 3
+    archetypes_sheet = pd.read_excel(archetypes_sheet_filepath, header=4)
+    archetypes_sheet = archetypes_sheet[~pd.isnull(archetypes_sheet["Address ID"])]
+    archetypes_sheet = archetypes_sheet[archetypes_sheet["Address ID"] != "Address ID"]
+    archetypes_sheet["Address ID"] = archetypes_sheet["Address ID"].astype(int)
+
+    # We merge the property details onto the costed archetypes
+    archetypes_to_cost = archetypes_to_cost.merge(
+        archetypes_sheet[["Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]],
+        on="Address ID",
+        how="left"
+    )
+
+    proposed_sample = archetypes_sheet[archetypes_sheet["Archetype ID"].isin(archetypes_to_cost["Archetype ID"])]
+
+    proposed_sample = proposed_sample[
+        [
+            "Name", "Postcode", "UPRN", "UDPRN", "Address ID", "Osm. ID", "Archetype ID",
+            "Property Type", "Wall Type", "Roof Type", "Heating"
+        ]
+    ]
+
+    # We classify into high and low confidence
+
+    match_classification = []
+    for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):
+        surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]]
+        # We now check if we have a perfect match
+        surveyed = surveyed[
+            (surveyed["Property Type"] == home["Property Type"]) &
+            (surveyed["Wall Type"] == home["Wall Type"]) &
+            (surveyed["Roof Type"] == home["Roof Type"]) &
+            (surveyed["Heating"] == home["Heating"])
+            ]
+
+        if surveyed.empty:
+            match_classification.append(
+                {
+                    "Address ID": home["Address ID"],
+                    "Match to Surveyed": "Approximate"
+                }
+            )
+            continue
+        match_classification.append(
+            {
+                "Address ID": home["Address ID"],
+                "Match to Surveyed": "Exact"
+            }
+        )
+
+    match_classification = pd.DataFrame(match_classification)
+
+    proposed_sample = proposed_sample.merge(
+        match_classification,
+        on="Address ID",
+        how="left",
+    )
+
+    # Merge on the cost per archetype
+    cost_per_archetype = (
+        archetypes_to_cost.groupby("Archetype ID")[['Total Cost of Measures inc Contingency']].mean().reset_index()
+    )
+    proposed_sample = proposed_sample.merge(
+        cost_per_archetype,
+        on="Archetype ID",
+        how="left"
+    )
+
+    # We add on a boolean to indicate if a property from that archetype has been modelled
+    proposed_sample = proposed_sample.merge(
+        archetypes_to_cost.groupby("Archetype ID")[["Has been modelled"]].any().reset_index(),
+        on="Archetype ID",
+        how="left"
+    )
+
+    proposed_sample["Total Cost of Measures inc Contingency"] = np.where(
+        ~proposed_sample["Has been modelled"],
+        None, proposed_sample["Total Cost of Measures inc Contingency"]
+    )
+
+    # Save excel
+    proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid (WIP).xlsx", index=False)
+
 # if __name__ == "__main__":
 #     main()