diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index d1f8d546..064ff8f5 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -317,6 +317,12 @@ class DataLoader: asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["Post Code"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() + elif ha_name == "HA56": + asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Post Code"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip() elif ha_name == "HA63": asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["POSTCODE"].astype(str).str.lower().str.strip() @@ -639,6 +645,54 @@ class DataLoader: return asset_list + @staticmethod + def correct_ha56_asset_list(asset_list): + # CH1 4JR has already been surveyed, but it's listed in the asset list + # as a single row, when it's actually 32 units, so we just set this + # as ineligible + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "CH1 4JR", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + # Same for CW8 3EU + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "CW8 3EU", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "CW1 3HP", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "WA4 2PH", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "BD6 1QJ", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "L39 1RS", + "Not eligible", + asset_list["ECO Eligibility"] + ) + + asset_list["ECO Eligibility"] = np.where( + asset_list["Post Code"] == "WA10 2DE", + "Not eligible", + asset_list["ECO Eligibility"] + ) + @staticmethod def correct_ha14_asset_list(asset_list): @@ -1970,6 +2024,24 @@ class DataLoader: return eco3_list + @staticmethod + def correct_ha56_eco3_list(eco3_list): + eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])] + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "Mount Pleasant, Crewe", "Mount Pleasant" + ) + + eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace( + "Dutton Close", "Dutton Way" + ) + + eco3_list["Post Code"] = eco3_list["Post Code"].str.replace( + "Ls63nl", "LS6 3NL" + ) + + return eco3_list + def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name): eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list") @@ -1978,8 +2050,8 @@ class DataLoader: asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower() eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "") - if ha_name == "HA25": - # 317 -> 259 + if ha_name in ["HA25", "HA56"]: + # HA25: 317 -> 259 missed_postcodes = { postcode for postcode in eco3_list["postcode_no_space"] if postcode not in asset_list["matching_postcode_nospace"].values @@ -2060,6 +2132,7 @@ class DataLoader: raise ValueError( f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched" ) + missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)] matching_lookup = pd.DataFrame(matching_lookup) # Check dupes as this will cause problems later on @@ -3896,6 +3969,9 @@ def calculate_eco4_post_ciga( def forecast_remaining_sales(loader): + # TODO: Skip HA34 for the moment + loader.data = {k: v for k, v in loader.data.items() if k != "HA34"} + # Assumptions: # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate # and I don't want the numbers to change too much, depenent on the CIGA conversation rate @@ -4523,9 +4599,9 @@ def forecast_remaining_sales(loader): gbis_delta_vs_original_estimate_remaining, # Expected cancellations ( - "GBIS Postcode list", "Of which expected sales - £", "Remaining - £", + "GBIS Postcode list", "", "Of which expected sales - £ - £", "GBIS total"): gbis_remaining_revenue, - ("GBIS Postcode list", "Of which expected cancellations -£", "Remaining - £", "GBIS total"): + ("GBIS Postcode list", "", "Of which expected cancellations -£", "GBIS total"): gbis_remaining_expected_cancellations_revenue } @@ -4786,14 +4862,14 @@ def app(): priority_has = [ "HA1", "HA2", "HA6", "HA7", "HA12", "HA13", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", # "HA34", - "HA35", "HA39", "HA41", "HA48", "HA50", "HA63", "HA107", "HA117" + "HA35", "HA39", "HA41", "HA48", "HA50", "HA56", "HA63", "HA107", "HA117" ] # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], # 35 [DONE] - # [WIP] + # 56 [WIP] # Consider for ECO4: - # Consider for GBIS: 56 + # Consider for GBIS: # 34 [bug in the results so leaving out for the moment] # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in