diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index ccd062e2..bfdc8beb 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -486,7 +486,7 @@ def extract_epr(pdf_path): data["Postcode"] = data["Address"].split(",")[-1].strip() # Extract Current and Potential SAP ratings - sap_match = re.search(r"GG \(1-20\)(\d{1,2})(\d{1,2})", text) + sap_match = re.search(r"GG \(1-20\)\s*(\d{1,2})\s*(\d{1,2})", text) current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2)) data["Current SAP Rating"] = current_sap @@ -896,7 +896,6 @@ def main(): # Find Osmosis IDs that are in the packages board but not in the matching looking missing_ids = set(retrofit_packages_board["Address ID"]) - set(matching_lookup["Address ID"]) missing_ids = list(missing_ids) - print(len(missing_ids)) if missing_ids: # We check that the missing ids have no data yet if len(missing_ids) != 8: @@ -937,6 +936,7 @@ def main(): "Actual SAP Rating", "Modelled SAP Band", "Modelled SAP Rating", + "Package Ref", ] + measure_columns ], on=["Address ID", "Name"], @@ -995,7 +995,206 @@ def main(): if stonewater_data["Address ID"].duplicated().sum(): raise Exception("Duplicate Address IDs") + # Save this data to excel + stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages.xlsx", index=False) + + cost_sheet = [ + { + "measure": "EWI 0.30 w.m2.K", "cost": 298.35, "unit": "m2" + }, + { + "measure": "CWI RdSAP Default", "cost": 14.21, "unit": "m2" + }, + { + "measure": "Poss Extract CWI & Refill (issues identified)", "cost": 14.21 + 25, "unit": "m2" + }, + { + "measure": "IWI 0.30 w.m2.K", "cost": 244.80, "unit": "m2" + }, + { + "measure": "EWI/IWI 0.3", "cost": (298.35 + 244.8) / 2, "unit": "m2" + }, + { + "measure": "Loft Insulation 0.11 w.m2.K", "cost": 16.07, "unit": "m2" + }, + { + "measure": "Flat Roof 0.11 w.m2.K", "cost": 195, "unit": "m2" + }, + { + "measure": "DG Window 1.30 w.m2.K", "cost": 1140, "unit": "each" + }, + { + "measure": "Secondary 2.40", "cost": 974, "unit": "each" + }, + { + "measure": "Ins. Door 1.30 w.m2.K", "cost": None, "unit": "each" + }, + { + "measure": "Ins. Door 1.40 w.m2.K", "cost": None, "unit": "each" + }, + { + "measure": "DMEV", "cost": 900, "unit": "each" + }, + { + "measure": "ASHP Vaillant 102607 5kw", "cost": None, "unit": "each" + }, + { + "measure": "HHRSH Quantum 150", "cost": None, "unit": "each" + }, + { + "measure": "Dual Stat Tank 210lt 50mm Foam", "cost": None, "unit": "each" + }, + { + "measure": "Dual Stat Tank 160lt 50mm Foam", "cost": None, "unit": "each" + }, + { + "measure": "Dual Stat Tank 110lt 50mm Foam", "cost": None, "unit": "each" + }, + { + "measure": "Smart Thermostat", "cost": 1200, "unit": "each" + }, + { + "measure": "TRV's", "cost": 350, "unit": "each" + }, + { + "measure": "Solar PV - 3.0kwp", "cost": 4365.0, "unit": "each" + }, + { + "measure": "Solar PV - 1.5kwp", "cost": 3881, "unit": "each" + }, + { + "measure": "LEL", "cost": 35, "unit": "per bulb" + }, + { + "measure": "Roof 0.16 - Walls 0.30", "cost": 180, "unit": "floor area m2" + }, + { + "measure": "Roof 0.16 - Walls 0.16", "cost": 180, "unit": "floor area m2" + }, + ] + cost_sheet = pd.DataFrame(cost_sheet) + + # Save cost sheet - ideally this will be used as a secondary sheet for Stonewater + cost_sheet.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - cost sheet.xlsx", index=False) + + stonewater_data["Room in Roof"].value_counts() + # stonewater_data[~pd.isnull(stonewater_data["Room in Roof"])]["survey_folder"].values + create_proposed_wave_3_bid( + costed_packages_filepath=os.path.join( + CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP).xlsx" + ), + archetypes_sheet_filepath=os.path.join( + CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx" + ) + ) + + +def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepath): + # We read in the costed packages + costed_packages = pd.read_excel(costed_packages_filepath) + + archetypes_to_cost = costed_packages[ + [ + "Name", "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Modelled SAP Band", + "Modelled SAP Rating", 'Total Cost of Measures', 'Contingency Cost', + 'Total Cost of Measures inc Contingency' + ] + ].copy() + + # We take properties that are EPC D and below (61% of units) + archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])] + + archetypes_to_cost["Has been modelled"] = ~pd.isnull(archetypes_to_cost["Modelled SAP Band"]) + + average_cost = archetypes_to_cost[ + archetypes_to_cost["Has been modelled"] + ]['Total Cost of Measures inc Contingency'].mean() + print(average_cost) + + # These are the Arhetypes that will likely be suitable for Wave 3 + archetypes_sheet = pd.read_excel(archetypes_sheet_filepath, header=4) + archetypes_sheet = archetypes_sheet[~pd.isnull(archetypes_sheet["Address ID"])] + archetypes_sheet = archetypes_sheet[archetypes_sheet["Address ID"] != "Address ID"] + archetypes_sheet["Address ID"] = archetypes_sheet["Address ID"].astype(int) + + # We merge the property details onto the costed archetypes + archetypes_to_cost = archetypes_to_cost.merge( + archetypes_sheet[["Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]], + on="Address ID", + how="left" + ) + + proposed_sample = archetypes_sheet[archetypes_sheet["Archetype ID"].isin(archetypes_to_cost["Archetype ID"])] + + proposed_sample = proposed_sample[ + [ + "Name", "Postcode", "UPRN", "UDPRN", "Address ID", "Osm. ID", "Archetype ID", + "Property Type", "Wall Type", "Roof Type", "Heating" + ] + ] + + # We classify into high and low confidence + + match_classification = [] + for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)): + surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]] + # We now check if we have a perfect match + surveyed = surveyed[ + (surveyed["Property Type"] == home["Property Type"]) & + (surveyed["Wall Type"] == home["Wall Type"]) & + (surveyed["Roof Type"] == home["Roof Type"]) & + (surveyed["Heating"] == home["Heating"]) + ] + + if surveyed.empty: + match_classification.append( + { + "Address ID": home["Address ID"], + "Match to Surveyed": "Approximate" + } + ) + continue + match_classification.append( + { + "Address ID": home["Address ID"], + "Match to Surveyed": "Exact" + } + ) + + match_classification = pd.DataFrame(match_classification) + + proposed_sample = proposed_sample.merge( + match_classification, + on="Address ID", + how="left", + ) + + # Merge on the cost per archetype + cost_per_archetype = ( + archetypes_to_cost.groupby("Archetype ID")[['Total Cost of Measures inc Contingency']].mean().reset_index() + ) + proposed_sample = proposed_sample.merge( + cost_per_archetype, + on="Archetype ID", + how="left" + ) + + # We add on a boolean to indicate if a property from that archetype has been modelled + proposed_sample = proposed_sample.merge( + archetypes_to_cost.groupby("Archetype ID")[["Has been modelled"]].any().reset_index(), + on="Archetype ID", + how="left" + ) + + proposed_sample["Total Cost of Measures inc Contingency"] = np.where( + ~proposed_sample["Has been modelled"], + None, proposed_sample["Total Cost of Measures inc Contingency"] + ) + + # Save excel + proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid (WIP).xlsx", index=False) + # if __name__ == "__main__": # main()