diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 5e444ca8..67362865 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -756,27 +756,34 @@ def main(): manual_filters = { "Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD", "6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG", - "1 Cluny Way": "12-1-1 Cluny Way-SG15 6ZB", "2 Bromyard Road": "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ", 'Flat 18, 1 Raglan Court': "StonewaterSurveys_13/60-3-18 Raglan Court, 1 Raglan Court-MK41 8QT", '14 Raglan Court, 1 Devizes Avenue': 'StonewaterSurveys_12/55-3-14 Raglan Court, Devizes Avenue-MK41 8QT', '19 South Road': 'StonewaterSurveys_4/19 The Oaks, South Road, SMETHWICK, B67 7BY', 'Flat 12 Pelican Lane': 'StonewaterSurveys_1/121-3-Flat 12 Lynton Court, Pelican Lane-RG14 1NN', 'Flat C, 44 St Leonards Avenue': 'StonewaterSurveys_11/427-2-44c St. Leonards Avenue-MK42 0RB', + '16 The Crescent, Kington': 'StonewaterSurveys_9/360-3-16 The Crescent-HR5 3AS', + '2 School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA', + '14 South Road': 'StonewaterSurveys_2/14 The Oaks, South Road, SMETHWICK, B67 7BY', + '1 Groves Street': 'StonewaterSurveys_4/19-5-1 Groves Street-SN2 2BW', + # '2 Sorrell Place': '', + # '72 St Ives Road': '', + # '1 The Close, Burton Gardens': '', + # '102 Cheaton Close': '', + # 'Flat 16 Spring Gardens': '', + # '4 Apple Close': '', + '25 Folly Lane': '', + } # We now match this retrofit packages board to the extracted data matching_lookup = [] for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)): # Handle the case that has the wrong postcode in the asset data - if home["Address ID"] == 6111566: - blah - # 6118117, 6118744, 6117091 if home["Name"] in manual_filters: filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy() else: filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()].copy() - filtered["survey_folder"].values # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains( @@ -836,14 +843,11 @@ def main(): print(len(missing_ids)) if missing_ids: # We check that the missing ids have no data yet - missing_data = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)][ - ["Name", "Address ID", "Archetype ID"]] - extracted_data[extracted_data["survey_folder"].str.contains("23 Monmouth")]["survey_folder"].values + if len(missing_ids) != 8: + raise Exception("Unacceptable number of missings") - matching_lookup[matching_lookup["survey_folder"].str.contains("23 Monmouth")] - - if matching_lookup["Osm. ID"].duplicated().sum(): - raise Exception("Duplicate Osm. IDs") + if matching_lookup["Address ID"].duplicated().sum(): + raise Exception("Duplicate Address IDs") if matching_lookup["survey_folder"].duplicated().sum(): raise Exception("Duplicate survey folders") @@ -865,20 +869,21 @@ def main(): ] # We should end up with a 1:1 mapping between the Osm. ID and the survey folder - stonewater_data = extracted_data.merge(matching_lookup, on="survey_folder", how="left").merge( + stonewater_data = extracted_data.merge(matching_lookup, on="survey_folder", how="inner").merge( retrofit_packages_board[ [ "Name", + "RA", "Address ID", "Archetype ID", - "Arch. Group Rank", "Archetype Representative", + "Arch. Group Rank", "Actual SAP Band", "Actual SAP Rating", "Modelled SAP Band", "Modelled SAP Rating", ] + measure_columns ], - on=["Osm. ID", "Name"], + on=["Address ID", "Name"], how="left" ) @@ -900,9 +905,13 @@ def main(): header=12 ) + windows_data = windows_data[windows_data["Address ID"] != "Address ID"] + windows_data = windows_data[~pd.isnull(windows_data["Address ID"])] + # We get a lookup id of Osm.ID and when the windows were fitted windows_data = windows_data[ - ["Osm. ID", "Window attributes - Fitted/renewed date", "Parent Asset Window attributes - Fitted/renewed date"] + ["Address ID", "Window attributes - Fitted/renewed date", + "Parent Asset Window attributes - Fitted/renewed date"] ] # Convert to string for the moment windows_data["Parent Asset Window attributes - Fitted/renewed date"] = windows_data[ @@ -921,7 +930,8 @@ def main(): "Fitted/renewed date"]).dt.days / 365 stonewater_data["Package Includes Windows"] = ~pd.isnull(stonewater_data["Window Upgrade"]) - stonewater_data = stonewater_data.merge(windows_data, on="Osm. ID", how="left") + windows_data["Address ID"] = windows_data["Address ID"].astype(float) + stonewater_data = stonewater_data.merge(windows_data, on="Address ID", how="left") # if __name__ == "__main__": # main()