diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 53279eed..5e444ca8 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -741,26 +741,53 @@ def main(): # We now merge on the coordinator data so that against each property, we can map the measures retrofit_packages_board = pd.read_excel( - os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater_SHDF_3_0_Board_work_in_progress_- 22.10.24.xlsx"), + os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater 3.0 Updated SAP Pre & Modelled 29.10.24.xlsx"), header=4 ) retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])] + # Take just the rows that have been surveyed + retrofit_packages_board = retrofit_packages_board[ + retrofit_packages_board["RA"].isin(["Invoiced", "Completed"]) + ] + + # Replace \n with "" + extracted_data["Postcode"] = extracted_data["Postcode"].str.replace("\n", "") + + manual_filters = { + "Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD", + "6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG", + "1 Cluny Way": "12-1-1 Cluny Way-SG15 6ZB", + "2 Bromyard Road": "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ", + 'Flat 18, 1 Raglan Court': "StonewaterSurveys_13/60-3-18 Raglan Court, 1 Raglan Court-MK41 8QT", + '14 Raglan Court, 1 Devizes Avenue': 'StonewaterSurveys_12/55-3-14 Raglan Court, Devizes Avenue-MK41 8QT', + '19 South Road': 'StonewaterSurveys_4/19 The Oaks, South Road, SMETHWICK, B67 7BY', + 'Flat 12 Pelican Lane': 'StonewaterSurveys_1/121-3-Flat 12 Lynton Court, Pelican Lane-RG14 1NN', + 'Flat C, 44 St Leonards Avenue': 'StonewaterSurveys_11/427-2-44c St. Leonards Avenue-MK42 0RB', + } + # We now match this retrofit packages board to the extracted data matching_lookup = [] for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)): - # Handle the case that has the wrong postcode in the asset data - if home["Name"] == "Flat 21 Walmer Street": - filtered = extracted_data[ - extracted_data["survey_folder"] == "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD" - ].copy() + if home["Address ID"] == 6111566: + blah + # 6118117, 6118744, 6117091 + if home["Name"] in manual_filters: + filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy() else: filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()].copy() + filtered["survey_folder"].values - # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces - filtered = filtered[filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains( - home["Name"].replace(r"[^\w\s]", ""), case=False - )] + # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces + to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains( + home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False + ) + if to_filter.sum() == 0: + to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.replace(",", "").str.replace(".", + "").str.contains( + home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False + ) + filtered = filtered[to_filter] if filtered.empty: continue @@ -769,7 +796,7 @@ def main(): matching_lookup.append( { "survey_folder": filtered["survey_folder"].values[0], - "Osm. ID": home["Osm. ID"], + "Address ID": home["Address ID"], "Name": home["Name"] } ) @@ -797,15 +824,23 @@ def main(): matching_lookup.append( { "survey_folder": filtered["survey_folder"].values[0], - "Osm. ID": home["Osm. ID"], + "Address ID": home["Address ID"], "Name": home["Name"] } ) matching_lookup = pd.DataFrame(matching_lookup) # Find Osmosis IDs that are in the packages board but not in the matching looking - # missing_osm_ids = set(retrofit_packages_board["Osm. ID"]) - set(matching_lookup["Osm. ID"]) - # missing_osm_ids = list(missing_osm_ids) + missing_ids = set(retrofit_packages_board["Address ID"]) - set(matching_lookup["Address ID"]) + missing_ids = list(missing_ids) + print(len(missing_ids)) + if missing_ids: + # We check that the missing ids have no data yet + missing_data = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)][ + ["Name", "Address ID", "Archetype ID"]] + extracted_data[extracted_data["survey_folder"].str.contains("23 Monmouth")]["survey_folder"].values + + matching_lookup[matching_lookup["survey_folder"].str.contains("23 Monmouth")] if matching_lookup["Osm. ID"].duplicated().sum(): raise Exception("Duplicate Osm. IDs") @@ -834,7 +869,6 @@ def main(): retrofit_packages_board[ [ "Name", - "Osm. ID", "Address ID", "Archetype ID", "Arch. Group Rank", "Archetype Representative", @@ -848,6 +882,14 @@ def main(): how="left" ) + # Create a section for costs + for measure in measure_columns: + stonewater_data[f"Cost of {measure}"] = None + + stonewater_data["Total Cost of Measures"] = None + stonewater_data["Contingency Cost"] = None + stonewater_data["Total Cost of Measures inc Contingency"] = None + # We've appended the recommended packages and modelled SAP ratings to the data # We also want to append the windows data windows_data = pd.read_excel( @@ -878,12 +920,8 @@ def main(): windows_data["Years since fitted/renewed"] = (pd.Timestamp.now() - windows_data[ "Fitted/renewed date"]).dt.days / 365 - # TODO: Flag if a package includes windows - - # Save this as a csv - # extracted_data.to_csv("Wave 3 Summary Data - first 200 files.csv", index=False) - - missed = [f for f in survey_folders if f not in extracted_data["survey_folder"].tolist()] + stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"]) + stonewater_data = stonewater_data.merge(windows_data, on="Osm. ID", how="left") # if __name__ == "__main__": # main()