From 0142e6fe5fcbcffc836bc139df48cf31e77545f1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 8 Apr 2024 15:29:52 +0100 Subject: [PATCH] wip matching completed surveys back to the asset list --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- .../ha_15_32/ha_analysis_batch_3.py | 78 +++++++++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index b4b82d0b..de2c0e6a 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -6907,3 +6907,81 @@ def app(): december_figures["ECO4 remaining"] ) december_figures["ECO4 remaining"].sum() + + # Adhoc - for UNITAS, stripping out additional surveys that have been completed + unitas_data = loader.data["HA50"].copy() + unitas_asset_list = unitas_data["asset_list"].copy() + unitas_survey_sheet = unitas_data["survey_list"].copy() + # We remove the surveyed properties from the asset sheet + unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])] + unitas_asset_list = unitas_asset_list.merge( + unitas_survey_sheet[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])] + unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"]) + + # We read in the data for the further completed surveys + unitas_phase_1_workbook = openpyxl.load_workbook( + "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx" + ) + phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"] + phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"] + phase1_colnames = [cell.value for cell in phase_1_worksheet[1]] + phase_1_rows_data = [] + for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + phase_1_rows_data.append(row_data) + + phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames) + + # Correct phase 1 surveys in the same fashion as the previous approach + phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy()) + + # We check all phase 1 surveys are contained in the data we had before + additional = [] + for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)): + # We look for the entry in the old survey sheet: + # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]] + # if matched_uprn.shape[0] == 1: + # continue + + matched_1 = unitas_survey_sheet[ + (unitas_survey_sheet["Post Code"] == row["Post Code"]) & + (unitas_survey_sheet["NO."] == row["NO."]) + ] + + if matched_1.shape[0] == 1: + continue + + matched_2 = unitas_survey_sheet[ + (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) & + (unitas_survey_sheet["NO."] == row["NO."]) + ] + + if matched_2.shape[0] == 1: + continue + + additional.append(row.to_dict()) + additional = pd.DataFrame(additional) + + phase_2_rows_data = [] + for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + phase_2_rows_data.append(row_data) + + phase2_colnames = [cell.value for cell in phase_2_worksheet[1]] + phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames) + # Drop all of the occurances of "OFFICE USE ONLY" columns + phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c]) + common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns}) + additional_filtered = additional[common_columns] + + further_unitas_completed_surveys = pd.concat( + [phase_2_surveys, additional_filtered], + axis=0, + ignore_index=True + ) + + # We match these back to the asset list