diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 4db089e7..904afd30 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -3077,7 +3077,280 @@ def revised_model(): extracted_data.append(summary_data) retrofit_assessment_data = pd.DataFrame(extracted_data) - # TODO - Save this data + + # Remove some definite duplicates + dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].duplicated()]["Address"] + dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].isin(dupes)] + dupes = dupes.sort_values("Address") + # Get all of the folders that end with ROSS + to_drop = dupes[dupes["survey_folder"].str.endswith("ROSS")]["survey_folder"].unique().tolist() + + retrofit_assessment_data = retrofit_assessment_data[ + ~retrofit_assessment_data["survey_folder"].isin( + [ + "StonewaterSurveys_10/4 Beech Road, LUTON, LU1 1DP ROSS", + "StonewaterSurveys_2/135 Runley Road, LUTON, LU1 1TX ROSS", + "StonewaterSurveys_13/7 Saxon Road, LUTON, LU3 1JR ROSS" + ] + to_drop + ) + ] + # Replace \n with "" + retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "") + + # retrofit_assessment_data.to_csv( + # os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet.csv"), index=False + # ) + + # We can read in the data as needed + + # Next Step: Read in the coordinated measures and match to the extracted data + ############################################################ + # CCS + ############################################################# + ccs_coordination_sheet = pd.read_excel( + os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "CCS_Installation_Compliance_CCS.xlsx"), + header=4 + ) + ccs_coordination_sheet["contractor"] = "CCS" + # We split ccs into two sections - the first being + ccs_coordination_removed_from_programme = ccs_coordination_sheet.tail(21) + ccs_coordination_sheet = ccs_coordination_sheet.head(87) + ccs_coordination = pd.concat([ccs_coordination_removed_from_programme, ccs_coordination_sheet]) + + ############################################################ + # WATES + ############################################################# + wates_coordination_sheet = pd.read_excel( + os.path.join( + CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_Vinci-Wates.xlsx" + ), + header=4 + ) + wates_coordination_sheet["contractor"] = "Wates" + # Break into the different sites: + # Wiltshire + wates_coordination_sheet_wiltshere = wates_coordination_sheet.head(267) + wates_coordination_sheet_herefordshire = wates_coordination_sheet.iloc[271:332, :] + wates_coordination_sheet_coventry = wates_coordination_sheet.iloc[336:409, :] + wates_coordination_sheet_bedfordshire = wates_coordination_sheet.iloc[413:520, :] + wates_coordination_sheet_bournemouth = wates_coordination_sheet.iloc[524:567, :] + wates_coordination_sheet_cambridgeshire = wates_coordination_sheet.iloc[571:581, :] + wates_coordination_sheet_removed_from_programme = wates_coordination_sheet.iloc[586:926, :] + wates_coordination_sheet_abeyance = wates_coordination_sheet.iloc[928:972, :] + + wates_coordination = pd.concat( + [ + wates_coordination_sheet_wiltshere, + wates_coordination_sheet_herefordshire, + wates_coordination_sheet_coventry, + wates_coordination_sheet_bedfordshire, + wates_coordination_sheet_bournemouth, + wates_coordination_sheet_cambridgeshire, + wates_coordination_sheet_removed_from_programme, + wates_coordination_sheet_abeyance + ] + ) + + # Combine the data back + + ############################################################ + # NEW 450 COORDINATED RETROFIT ASSESSMENTS + ############################################################# + + retrofit_packages_board = pd.read_excel( + os.path.join( + CUSTOMER_FOLDER_PATH, + "Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1732034933 Final 19.11.24.xlsx" + ), + header=4 + ) + retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])] + # Take just the rows that have been surveyed + retrofit_packages_board = retrofit_packages_board[ + retrofit_packages_board["RA"].isin(["Invoiced", "Completed"]) + ] + + manual_filters = { + "Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD", + "6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG", + "2 Bromyard Road": "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ", + 'Flat 18, 1 Raglan Court': "StonewaterSurveys_13/60-3-18 Raglan Court, 1 Raglan Court-MK41 8QT", + '14 Raglan Court, 1 Devizes Avenue': 'StonewaterSurveys_12/55-3-14 Raglan Court, Devizes Avenue-MK41 8QT', + '19 South Road': 'StonewaterSurveys_4/19 The Oaks, South Road, SMETHWICK, B67 7BY', + 'Flat 12 Pelican Lane': 'StonewaterSurveys_1/121-3-Flat 12 Lynton Court, Pelican Lane-RG14 1NN', + 'Flat C, 44 St Leonards Avenue': 'StonewaterSurveys_11/427-2-44c St. Leonards Avenue-MK42 0RB', + '16 The Crescent, Kington': 'StonewaterSurveys_9/360-3-16 The Crescent-HR5 3AS', + '2 School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA', + '14 South Road': 'StonewaterSurveys_2/14 The Oaks, South Road, SMETHWICK, B67 7BY', + '1 Groves Street': 'StonewaterSurveys_4/19-5-1 Groves Street-SN2 2BW', + '2 Calshot Walk': 'StonewaterSurveys_3/156-3-2 Calshot Walk-MK41 8QS', + '21 Constitution Hill': 'StonewaterSurveys_1/112-11-21 Constitution Hill-BH14 0PX', + '22 Constitution Hill': 'StonewaterSurveys_4/185-8-22 Constitution Hill-BH14 0PX', + '2 Marches Cottages, School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA', + '26, Copthorn House, Brighton Road': 'StonewaterSurveys_15/133-1-26 Brighton Road-KT20 6BQ', + '4, Old St Marys, Ripley Lane': "StonewaterSurveys_15/433-3-4 Ripley Lane-KT24 6JG", + '1 Nelson House, Short Street': 'StonewaterSurveys_15/89-2-1 Short Street-GU11 1HX', + "18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX', + '3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX', + '16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ', + '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX', + '7 Croft Street': 'StonewaterSurveys_8/333-2-7 Croft Street-HR6 8LA' + } + + # We now match this retrofit packages board to the extracted data + matching_lookup = [] + for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)): + + # Handle the case that has the wrong postcode in the asset data + if home["Name"] in manual_filters: + filtered = retrofit_assessment_data[ + retrofit_assessment_data["survey_folder"] == manual_filters[home["Name"]] + ].copy() + else: + filtered = retrofit_assessment_data[ + retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower() + ].copy() + + # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces + to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains( + home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False + ) + if to_filter.sum() == 0: + to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.replace(",", "").str.replace(".", + "").str.contains( + home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False + ) + filtered = filtered[to_filter] + + if filtered.empty: + continue + + if filtered.shape[0] == 1: + matching_lookup.append( + { + "survey_folder": filtered["survey_folder"].values[0], + "Address ID": home["Address ID"], + "Name": home["Name"] + } + ) + continue + + # home["Name"] should be contained in the survey_folder + filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)] + # We have an edge case wher some properties have two outputs in Sharepoint + if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ": + raise Exception("Fix me1") + # filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"] + + if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB': + raise Exception("Fix me2") + # filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"] + + if home["Name"] == '2 Bromyard Road' and home["Postcode"] == 'WR15 8BZ': + filtered = filtered[filtered["survey_folder"] == "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ"] + + if filtered.empty: + continue + if filtered.shape[0] != 1: + raise Exception("something went wrong") + + matching_lookup.append( + { + "survey_folder": filtered["survey_folder"].values[0], + "Address ID": home["Address ID"], + "Name": home["Name"] + } + ) + matching_lookup = pd.DataFrame(matching_lookup) + + ccs_coordination = ccs_coordination.rename( + columns={"Post Code": "Postcode"} + ) + ccs_coordination = ccs_coordination[~pd.isnull(ccs_coordination["Postcode"])] + from fuzzywuzzy import fuzz + + ccs_manual_filters = {} + ccs_matching_lookup = [] + for _, home in tqdm(ccs_coordination.iterrows(), total=len(ccs_coordination)): + # Handle the case that has the wrong postcode in the asset data + if home["Name"] in manual_filters: + filtered = retrofit_assessment_data[ + retrofit_assessment_data["survey_folder"] == manual_filters[home["Name"]] + ].copy() + else: + filtered = retrofit_assessment_data[ + retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower() + ].copy() + + # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces + to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains( + home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False + ) + if to_filter.sum() == 0: + to_filter = ( + filtered["survey_folder"]. + str.replace(r"[^\w\s]", ""). + str.replace(",", ""). + str.replace(".", ""). + str.contains( + home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False + ) + ) + if to_filter.sum() == 0: + to_filter = filtered["Address"].str.split(",").str[0:2].str.join("") == home["Name"] + + if to_filter.sum() == 0: + # Do a fuzzy match on the name + # Find the best filter + to_filter = filtered["Address"].str.split(",").str[0:2].str.join("").apply( + lambda x: fuzz.partial_ratio(home["Name"], x) > 9 + ) + + if to_filter.sum() == 0: + blah + filtered = filtered[to_filter] + + if filtered.empty: + continue + + if filtered.shape[0] == 1: + ccs_matching_lookup.append( + { + "survey_folder": filtered["survey_folder"].values[0], + "Asset ID.1": home["Asset ID.1"], + "Name": home["Name"] + } + ) + continue + + blah2 + + # home["Name"] should be contained in the survey_folder + # filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)] + # # We have an edge case wher some properties have two outputs in Sharepoint + # if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ": + # raise Exception("Fix me1") + # # filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"] + # + # if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB': + # raise Exception("Fix me2") + # # filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"] + # + # if home["Name"] == '2 Bromyard Road' and home["Postcode"] == 'WR15 8BZ': + # filtered = filtered[filtered["survey_folder"] == "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ"] + # + # if filtered.empty: + # continue + # if filtered.shape[0] != 1: + # raise Exception("something went wrong") + # + # matching_lookup.append( + # { + # "survey_folder": filtered["survey_folder"].values[0], + # "Address ID": home["Address ID"], + # "Name": home["Name"] + # } + # ) # if __name__ == "__main__": # main()