diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 2654fae5..53279eed 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -720,15 +720,22 @@ def main(): extracted_data["Current SAP Rating"] = extracted_data["Current SAP Rating"].astype(int) extracted_data["Current EPC Band"] = extracted_data["Current SAP Rating"].apply(sap_to_epc) - # TODO: Clean up SAP and extract EPC # TODO: RIR floor area!!! # Remove some definite duplicates + dupes = extracted_data[extracted_data["Address"].duplicated()]["Address"] + dupes = extracted_data[extracted_data["Address"].isin(dupes)] + dupes = dupes.sort_values("Address") + # Get all of the folders that end with ROSS + to_drop = dupes[dupes["survey_folder"].str.endswith("ROSS")]["survey_folder"].unique().tolist() + extracted_data = extracted_data[ ~extracted_data["survey_folder"].isin( [ "StonewaterSurveys_10/4 Beech Road, LUTON, LU1 1DP ROSS", - ] + "StonewaterSurveys_2/135 Runley Road, LUTON, LU1 1TX ROSS", + "StonewaterSurveys_13/7 Saxon Road, LUTON, LU3 1JR ROSS" + ] + to_drop ) ] @@ -740,8 +747,15 @@ def main(): retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])] # We now match this retrofit packages board to the extracted data matching_lookup = [] - for _, home in retrofit_packages_board.iterrows(): - filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()].copy() + for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)): + + # Handle the case that has the wrong postcode in the asset data + if home["Name"] == "Flat 21 Walmer Street": + filtered = extracted_data[ + extracted_data["survey_folder"] == "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD" + ].copy() + else: + filtered = extracted_data[extracted_data["Postcode"].str.lower() == home["Postcode"].lower()].copy() # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces filtered = filtered[filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains( @@ -749,7 +763,6 @@ def main(): )] if filtered.empty: - print("Check this once we have full data") continue if filtered.shape[0] == 1: @@ -766,18 +779,20 @@ def main(): filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)] # We have an edge case wher some properties have two outputs in Sharepoint if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ": - bl1h2 - filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"] + raise Exception("Fix me1") + # filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"] if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB': - blah1 - filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"] + raise Exception("Fix me2") + # filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"] + + if home["Name"] == '2 Bromyard Road' and home["Postcode"] == 'WR15 8BZ': + filtered = filtered[filtered["survey_folder"] == "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ"] if filtered.empty: - print("Check this once we have full data2!!!") continue if filtered.shape[0] != 1: - raise Exception("somethign went wrong2") + raise Exception("something went wrong") matching_lookup.append( { @@ -788,6 +803,9 @@ def main(): ) matching_lookup = pd.DataFrame(matching_lookup) + # Find Osmosis IDs that are in the packages board but not in the matching looking + # missing_osm_ids = set(retrofit_packages_board["Osm. ID"]) - set(matching_lookup["Osm. ID"]) + # missing_osm_ids = list(missing_osm_ids) if matching_lookup["Osm. ID"].duplicated().sum(): raise Exception("Duplicate Osm. IDs")