From dfa37f86d469d4ee926ee0dc2438629fb35e17cc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 12 Nov 2024 15:49:28 +0000 Subject: [PATCH] Adding postcode summary to stonewater --- .../stonewater/Wave 3 Preparation.py | 79 +++++++++++++++---- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 0036a0a4..889d8f88 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -916,13 +916,14 @@ def main(): "18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX', '3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX', '16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ', - '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX' + '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX', + '7 Croft Street': 'StonewaterSurveys_8/333-2-7 Croft Street-HR6 8LA' } # We now match this retrofit packages board to the extracted data matching_lookup = [] for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)): - + # Handle the case that has the wrong postcode in the asset data if home["Name"] in manual_filters: filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy() @@ -986,11 +987,11 @@ def main(): missing_ids = list(missing_ids) if missing_ids: # We check that the missing ids have no data yet - missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)] - missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv( - CUSTOMER_FOLDER_PATH + "/missed_debugging.csv") + # missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)] + # missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv( + # CUSTOMER_FOLDER_PATH + "/missed_debugging.csv") - if len(missing_ids) != 8: + if len(missing_ids) != 6: raise Exception("Unacceptable number of missings") if matching_lookup["Address ID"].duplicated().sum(): @@ -1083,12 +1084,20 @@ def main(): stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"]) windows_data["Address ID"] = windows_data["Address ID"].astype(float) stonewater_data = stonewater_data.merge(windows_data, on="Address ID", how="left") + stonewater_data = stonewater_data.sort_values("Archetype ID", ascending=True) if stonewater_data["Address ID"].duplicated().sum(): raise Exception("Duplicate Address IDs") + for c in [ + 'Window attributes - Fitted/renewed date', + 'Parent Asset Window attributes - Fitted/renewed date', + 'Fitted/renewed date' + ]: + stonewater_data[c] = stonewater_data[c].astype(str) + # Save this data to excel - stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages.xlsx", index=False) + stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V2.xlsx", index=False) cost_sheet = [ { @@ -1173,7 +1182,7 @@ def main(): create_proposed_wave_3_bid( costed_packages_filepath=os.path.join( - CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) MR Review v1.xlsx" + CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) Single Model V3.xlsx" ), archetypes_sheet_filepath=os.path.join( CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx" @@ -1183,8 +1192,8 @@ def main(): def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepath): # We read in the costed packages - # Note: Header as 12 is for Matt Ratcliff's reviewed version costed_packages = pd.read_excel(costed_packages_filepath, header=13, sheet_name="Modelled Packages") + costed_packages = costed_packages[~pd.isnull(costed_packages["Address"])] archetypes_to_cost = costed_packages[ [ @@ -1213,16 +1222,11 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa 'Existing Primary Heating System', 'Existing Primary Heating PCDF Reference']) - # We take properties that are EPC D and below (61% of units) + # We take properties that are EPC D and below (59% of units) archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])] archetypes_to_cost["Has been modelled"] = ~pd.isnull(archetypes_to_cost["Modelled SAP Band"]) - average_cost = archetypes_to_cost[ - archetypes_to_cost["Has been modelled"] - ]['Total Cost of Measures inc Contingency'].mean() - print(average_cost) - # These are the Arhetypes that will likely be suitable for Wave 3 archetypes_sheet = pd.read_excel(archetypes_sheet_filepath, header=4) archetypes_sheet = archetypes_sheet[~pd.isnull(archetypes_sheet["Address ID"])] @@ -1236,7 +1240,21 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa how="left" ) - proposed_sample = archetypes_sheet[archetypes_sheet["Archetype ID"].isin(archetypes_to_cost["Archetype ID"])] + proposed_sample = archetypes_sheet[ + archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str)) + ] + + not_proposed = archetypes_sheet[ + ~archetypes_sheet["Archetype ID"].astype(str).isin(archetypes_to_cost["Archetype ID"].astype(int).astype(str)) + ] + + # archetypes_without_survey = [] + # for p in list(set(not_proposed)): + # filtered = costed_packages[costed_packages["Archetype ID"].astype(int).astype(str) == p] + # if filtered.empty: + # archetypes_without_survey.append(p) + + # Can we propose anything about archetypes that were not surveyed? proposed_sample = proposed_sample[ [ @@ -1247,6 +1265,8 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa # We classify into high and low confidence + archetypes_to_cost["Surveyed Main Roof"] = archetypes_to_cost["Surveyed Main Roof"].fillna("") + match_classification = [] for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)): @@ -1331,8 +1351,33 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa None, proposed_sample["Total Cost of Measures inc Contingency"] ) + proposed_sample = proposed_sample.sort_values("Archetype ID", ascending=True) + # Save excel - proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid (WIP).xlsx", index=False) + proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid V2 (WIP).xlsx", index=False) + + # For each postcode that's in the bid, we also summarise the number of units in the bid and number left out + proposed_sample_postcodes = proposed_sample["Postcode"].unique() + + postcode_summary = [] + for postcode in proposed_sample_postcodes: + in_proposal = proposed_sample[proposed_sample["Postcode"] == postcode] + not_in_proposal = not_proposed[not_proposed["Postcode"] == postcode] + postcode_summary.append( + { + "Postcode": postcode, + "Number of properties in Proposal": len(in_proposal), + "Number of properties not in Proposal": len(not_in_proposal) + } + ) + postcode_summary = pd.DataFrame(postcode_summary) + postcode_summary = postcode_summary.sort_values( + "Number of properties not in Proposal", + ascending=False).reset_index(drop=True) + + postcode_summary.to_excel( + CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid Postcode Summary.xlsx", index=False + ) def find_remaining_surveys():