diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index fdc00876..d75a9f34 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -42,7 +42,7 @@ class DataLoader: # We expect 4 unmatched addresses, which have been validated manually as being in the ciga file but not # the asset list "HA14": 4, - # There's just too many unmatched here - if we identify some homes that + # There's just too many unmatched here "HA6": 117, "HA107": 52 } @@ -786,6 +786,8 @@ class DataLoader: survey_list = data_assets["survey_list"].copy() ciga_list = data_assets["ciga_list"].copy() + asset_list_starting_size = asset_list.shape[0] + # Change the column name if it's ECO eligibility asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"}) # Remove surplus whitespace from the ECO Eligibility column @@ -793,19 +795,17 @@ class DataLoader: # Push to lower case asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower() # Remap - asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].map(eco_eligibility_map) + asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].replace(eco_eligibility_map) if not ciga_list.empty: # We merge on ciga and update the status to reflect if it has failed ciga or not # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA # check - asset_list = asset_list.merge( - ciga_list[["asset_list_row_id", "Guarantee"]], - how='left', - on="asset_list_row_id" - ) - asset_list["ECO Eligibility"].value_counts() + ciga_list_to_merge = ciga_list[["asset_list_row_id", "Guarantee"]].copy() + ciga_list_to_merge = ciga_list_to_merge[~pd.isnull(ciga_list_to_merge["asset_list_row_id"])] + + asset_list = asset_list.merge(ciga_list_to_merge, how='left', on="asset_list_row_id") asset_list["ECO Eligibility"] = np.where( ( @@ -818,7 +818,10 @@ class DataLoader: # We replace any remaining "Subject to CIGA" with pass Ciga asset_list["ECO Eligibility"] = np.where( - asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False), + ( + asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) & + (asset_list["Guarantee"] == "No") + ), "eco4 - passed ciga", asset_list["ECO Eligibility"] ) @@ -826,6 +829,8 @@ class DataLoader: asset_list = asset_list.drop(columns=["Guarantee"]) # Update the asset list with the categorisations and rename changes + if asset_list.shape[0] != asset_list_starting_size: + raise ValueError("The asset list has changed in size") self.data[ha_name]["asset_list"] = asset_list # Report on sales @@ -846,7 +851,7 @@ class DataLoader: survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip() # Remap the values in the scheme column - survey_list[scheme_column] = survey_list[scheme_column].map(scheme_map) + survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map) survey_list["installation_status"] = None survey_list["installation_status"] = np.where(