From e2055b3b7dde7a1b001a568c23bb3016fbfa4079 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 19:34:43 +0000 Subject: [PATCH] fixed variance for HA6 --- .../ha_15_32/ha_analysis_batch_3.py | 135 +++++++++++++++++- 1 file changed, 129 insertions(+), 6 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 09b0910e..8c9f59c2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -730,6 +730,81 @@ class DataLoader: "Post Code" ] = "ST5 7BY" + # PERFORM ADDITIONAL DROPS + # Dropping rows based on multiple conditions + conditions_to_drop = [ + (survey_list['Street / Block Name'] == "Bedford Crescent") & (survey_list['Post Code'] == "ST5 3EH") & ( + survey_list['NO.'] == 23) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "Hereford Avenue") & (survey_list['Post Code'] == "ST5 3EJ") & ( + survey_list['NO.'] == 92) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "Seabridge Lane") & (survey_list['Post Code'] == "ST5 3EX") & ( + survey_list['NO.'].isin([16, 18, 42])) & ( + survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "ESKDALE PLACE") & (survey_list['Post Code'] == "ST5 3QW") & ( + survey_list['NO.'] == 5) & (survey_list['SUBMISSION DATE'].astype(str) == "2023-03-06 00:00:00"), + (survey_list['Street / Block Name'] == "Birch House road") & (survey_list['Post Code'] == "ST6 2LS") & ( + survey_list['NO.'].isin([56, 58])), + (survey_list['Street / Block Name'] == "Blackthorn Place") & (survey_list['Post Code'] == "ST6 2LS") & ( + survey_list['NO.'].isin([37, 39])), + (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 7BT") & ( + survey_list['NO.'].isin([17, 6])), + (survey_list['Street / Block Name'] == "Lion Grove") & (survey_list['Post Code'] == "ST5 7HQ") & ( + survey_list['NO.'].isin([10, 12])) & ( + survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "DENRY CRESCENT") & (survey_list['Post Code'] == "ST5 8JW") & ( + survey_list['NO.'] == 87) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")), + (survey_list['Street / Block Name'] == "HOLLINS CRESCENT") & (survey_list['Post Code'] == "ST7 1JW") & ( + survey_list['NO.'] == 19) + ] + + # Combine all conditions with an OR "|" + combined_condition = np.logical_or.reduce(conditions_to_drop) + + # Drop rows that meet the combined condition + survey_list = survey_list[~combined_condition] + + # Making replacements using np.where + survey_list['Post Code'] = np.where( + (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3EH") & ( + survey_list['NO.'] == 17), + "ST5 7BT", + survey_list['Post Code'] + ) + + survey_list['Post Code'] = np.where( + (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3ED") & ( + survey_list['NO.'] == 6), + "ST5 7BT", + survey_list['Post Code'] + ) + + # Maple avenue (stoke on trent, not newcastle) should be st7 1jw + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"].str.lower().str.contains("maple avenue")) & ( + survey_list["Post Code"].str.lower() == "st7 1jx" + ), + "st7 1jw", + survey_list["Post Code"] + ) + + # Hollins Crescent should be st7 1jx + survey_list["Post Code"] = np.where( + (survey_list["Street / Block Name"].str.lower().str.contains("hollins crescent")) & ( + survey_list["Post Code"].str.lower() == "st7 1jw" + ), + "st7 1jx", + survey_list["Post Code"] + ) + + # Additional drops as the above misses some: + survey_list = survey_list[ + ~((survey_list["NO."].astype(str).isin(["18", "42"])) & + (survey_list["Street / Block Name"] == "Seabridge Lane") & + (survey_list["Post Code"] == "ST5 3EY") & + (survey_list["SUBMISSION DATE"].astype(str) == "24.07.2023") & + (survey_list["INSTALLED OR CANCELLED"].str.contains("NO UPDATE YET"))) + ] + return survey_list @staticmethod @@ -1176,6 +1251,11 @@ class DataLoader: if matching_lookup.shape[0] != survey_list.shape[0]: raise ValueError("Mismatch in the number of survey rows and matching lookup rows") + matching_lookup = matching_lookup[~pd.isnull(matching_lookup["asset_list_row_id"])] + + if matching_lookup["asset_list_row_id"].duplicated().sum(): + raise ValueError("Duplicated matches in survey list") + # Merge onto the survey list survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id") @@ -1483,7 +1563,7 @@ class DataLoader: # TODO: We might have more indications of partial cancellations survey_list["installation_status"] = np.where( survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]), - "partially cancelled", + "cancelled", survey_list["installation_status"] ) else: @@ -3174,6 +3254,8 @@ def forecast_remaining_sales(loader): if survey_list.empty: asset_list_remaining = asset_list.copy() else: + # For HA6, there are a small number of postcodes that do not match to any item in the asset list + survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])] asset_list_remaining = asset_list.merge( survey_list[["asset_list_row_id", "installation_status"]], how="left", @@ -3183,6 +3265,47 @@ def forecast_remaining_sales(loader): asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])] asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"]) + # # TODO: TEMP + # n_pre_ciga = asset_list[ + # asset_list["ECO Eligibility"].isin( + # [ + # "eco4 - passed ciga", + # "eco4 (subject to ciga)", + # "failed ciga", + # "eco4" + # ] + # ) + # ].shape[0] + # + # n_pre_ciga_remaining = asset_list_remaining[ + # asset_list_remaining["ECO Eligibility"].isin( + # [ + # "eco4 - passed ciga", + # "eco4 (subject to ciga)", + # "failed ciga", + # "eco4" + # ] + # ) + # ].shape[0] + # + # compare_to_ids = asset_list_remaining["asset_list_row_id"].values + # assets_diff_ids = [x for x in asset_list["asset_list_row_id"].values if x not in compare_to_ids] + # diff = asset_list[asset_list["asset_list_row_id"].isin(assets_diff_ids)] + # + # n_sold = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0] + # # cancellations = survey_list[] + # asset_list["ECO Eligibility"].value_counts() + # + # # Revenenue + # pre_ciga_revenue = n_pre_ciga * eco4_rate + # pre_ciga_remaining_revenue = n_pre_ciga_remaining * eco4_rate + # sold_revenue = n_sold * eco4_rate + # + # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue) + # # MISSING 1 SALE from sold + # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0] + # # TODO: END TEMP + eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index() eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index() @@ -3402,13 +3525,13 @@ def forecast_remaining_sales(loader): ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga, ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining, ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue, + ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, ("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total, ("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining, ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold, - ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations, + ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate, # This is for jobs that are in-progress and could still cancel - ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations, - ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue, + ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations * eco4_rate, # ECO4 - asset list, post ciga, total ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"): eco4_post_ciga_total_results[ @@ -3460,9 +3583,9 @@ def forecast_remaining_sales(loader): ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total, ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue, ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold, - ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations, + ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate, # This is for jobs that are in-progress and could still cancel - ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations, + ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate, ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining, ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue, ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):