From f0c4ca0143ee886ba84960b00e3f2700b6047429 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Apr 2024 11:14:33 +0100 Subject: [PATCH] completed unitas --- .../ha_15_32/ha_analysis_batch_3.py | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 35bb63fe..f99c7b1a 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -7106,9 +7106,53 @@ def unitas_data_prep(loader): continue raise Exception("something went wrong") + outcome_matching = pd.DataFrame(outcome_matching) + + # We can have duplicate matches, so we format the Date letter sent column and retrieve the newest outcome + outcome_matching["Date letters sent"] = outcome_matching["Date letters sent"].str.lower() + outcome_matching["Extracted Date"] = outcome_matching["Date letters sent"].str.extract( + r'(?:w[./]c )(\d{2}\.\d{2}\.\d{4})') + outcome_matching["Extracted Date"] = pd.to_datetime(outcome_matching["Extracted Date"], format='%d.%m.%Y') + # We sort by asset_list_row_id and extracted date, and retrieve the newest + outcome_matching = outcome_matching.sort_values(["asset_list_row_id", "Extracted Date"], ascending=[True, False]) + + # Some properties will have multiple outcomes - for these, we re-format + outcome_matching_grouped = [] + for asset_list_row_id, grouped_data in outcome_matching.groupby("asset_list_row_id"): + if grouped_data.shape[0] == 1: + outcome_matching_grouped.append( + { + "Number of previous visits": 1, + **grouped_data.to_dict("records")[0] + } + ) + continue + if grouped_data.shape[0] == 2: + newest_visit = grouped_data.head(1) + oldest_visit = grouped_data.tail(1)[['Outcomes', 'Surveyor', 'Notes', 'Date letters sent']].add_suffix( + " second visit") + to_append = { + "Number of previous visits": 2, + **newest_visit.to_dict("records")[0], + **oldest_visit.to_dict("records")[0] + } + outcome_matching_grouped.append(to_append) + else: + raise Exception("something went wrong") + + outcome_matching_grouped = pd.DataFrame(outcome_matching_grouped) + + unitas_properties_to_survey_with_outcomes = unitas_properties_to_survey_full.merge( + outcome_matching_grouped, how="left", on="asset_list_row_id" + ) + unitas_properties_to_survey_with_outcomes["Number of previous visits"] = ( + unitas_properties_to_survey_with_outcomes["Number of previous visits"].fillna(0) + ) # Store as an excel - unitas_properties_to_survey_full.to_excel("Unitas - phase 2 properties to Survey.xlsx") + unitas_properties_to_survey_with_outcomes.to_excel("Unitas - phase 2 properties to Survey.xlsx") + + unitas_properties_to_survey_with_outcomes["Last EPC Built Form"].value_counts() def app():