From 04eba60961b0ea215701b2b35feaed74f9a5ef11 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 3 Feb 2025 13:04:10 +0000 Subject: [PATCH] fixing cleaning for stonewater --- .../stonewater/Wave 3 Preparation.py | 58 +++++++++++-------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 5c4da35b..04078e47 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -3194,7 +3194,32 @@ def revised_model(): "12005", wates_coordination["Asset ID"] ) - + + # We fill the missing ids + missing_lookup = { + "4 Sydnall Fields": 31231, + "12 Sydnall Fields": 31239, + "12 Athena Gardens": 28061, + "49 Banner Lane": 41189, + "4 Jonathan Road": 41232, + "8 Jonathan Road": 41236, + "1 Jonathan Road": 41229, + "96 Taunton Way": 31417, + "94 Taunton Way": 31418, + "1 Lady Lane": 29430, + "10 Jonathan Road": 41283, + "21 Jonathan Road": 41246, + "12 Ashcroft Close": 26399 + } + for name, asset_id in missing_lookup.items(): + wates_coordination["Asset ID_x"] = np.where( + wates_coordination["Name"] == name, + asset_id, + wates_coordination["Asset ID_x"] + ) + + wates_coordination = wates_coordination[~pd.isnull(wates_coordination["Asset ID"])] + wates_coordination["folder_path"] = wates_coordination["Sharepoint Folder"].apply( lambda x: extract_sharepoint_url(x) ) @@ -3412,6 +3437,7 @@ def revised_model(): # We get a match for all records assert ccs_matching_lookup.shape[0] == ccs_coordination.shape[0] assert not pd.isnull(ccs_matching_lookup["Asset ID.1"]).sum() + assert not ccs_matching_lookup["Asset ID.1"].duplicated().sum() # We do the same for Wates wates_coordination = wates_coordination.rename( @@ -3447,6 +3473,8 @@ def revised_model(): '3 Chanders Rd', '1 Orchard Close', ] + wates_coordination = wates_coordination[~wates_coordination["Name"].isin(wates_to_skip)] + for _, home in tqdm(wates_coordination.iterrows(), total=len(wates_coordination)): # Search the folder @@ -3547,34 +3575,18 @@ def revised_model(): raise Exception("No match") wates_matching_lookup = pd.DataFrame(wates_matching_lookup) + # We get a match for all records + assert wates_matching_lookup.shape[0] == wates_coordination.shape[0] + assert not pd.isnull(wates_matching_lookup["Asset ID"]).sum() + assert not wates_matching_lookup["Asset ID"].duplicated().sum() + # Merge lookup tables onto the coordination sheets wates_coordination = wates_coordination.merge( wates_matching_lookup, how="left", on="Name" ) missed_asset_id = wates_coordination[pd.isnull(wates_coordination["Asset ID_x"])] if not missed_asset_id.empty: - # We fill the missing ids - missing_lookup = { - "4 Sydnall Fields": 31231, - "12 Sydnall Fields": 31239, - "12 Athena Gardens": 28061, - "49 Banner Lane": 41189, - "4 Jonathan Road": 41232, - "8 Jonathan Road": 41236, - "1 Jonathan Road": 41229, - "96 Taunton Way": 31417, - "94 Taunton Way": 31418, - "1 Lady Lane": 29430, - "10 Jonathan Road": 41283, - "21 Jonathan Road": 41246, - "12 Ashcroft Close": 26399 - } - for name, asset_id in missing_lookup.items(): - wates_coordination["Asset ID_x"] = np.where( - wates_coordination["Name"] == name, - asset_id, - wates_coordination["Asset ID_x"] - ) + raise Exception("Missing Asset ID") ccs_coordination = ccs_coordination.merge( ccs_matching_lookup, how="left", on="Name"