From db7b6de87bfb13486a179cbdc547ae375cfc0c8d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 10 Mar 2024 14:13:20 +0000 Subject: [PATCH] handle HA56 dupes --- etl/eligibility/ha_15_32/ha_analysis_batch_3.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 064ff8f5..62099386 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -189,6 +189,7 @@ class DataLoader: "HA25": 154, "HA41": 26, "HA50": 5, + "HA56": 320, "HA63": 0, "HA117": 4 } @@ -693,6 +694,8 @@ class DataLoader: asset_list["ECO Eligibility"] ) + return asset_list + @staticmethod def correct_ha14_asset_list(asset_list): @@ -2040,6 +2043,14 @@ class DataLoader: "Ls63nl", "LS6 3NL" ) + # Handle a duplicate + eco3_list = eco3_list[ + ~((eco3_list["Street / Block Name"] == "Mount Pleasant") & + (eco3_list["Post Code"] == "CW1 3JF") & + (eco3_list["NO "] == 5) & + (eco3_list["INSTALL/ CANCELLATION DATE"] == "CANCELLED 20.5.2022")) + ] + return eco3_list def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): @@ -2128,15 +2139,16 @@ class DataLoader: # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2 # where many surveys were conducted on house numbers, not in the asset list # 154 missed, 2827 matched for HA 25 + # For HA56, the number of missed is high at 320, however a big portion of these are due to the block being + # listed in the asset list, and individual units being in the survey list if len(missed) != self.UNMATCHED_ECO3[ha_name]: raise ValueError( f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" ) - missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)] matching_lookup = pd.DataFrame(matching_lookup) # Check dupes as this will cause problems later on - if matching_lookup["asset_list_row_id"].duplicated().any(): + if matching_lookup["asset_list_row_id"].duplicated().sum(): raise ValueError("Duplicated asset list row ids") # Merge onto eco3 list