diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 430e5ff7..9a959956 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -3658,19 +3658,47 @@ def patch_cleaned(cleaned): def calculate_eco4_post_ciga( eligiblity_counts, input_data, ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate, - eco4_rate + eco4_rate, archetype_conversion_rate ): remaining_needing_ciga_check = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"] == "eco4 (subject to ciga)" + eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") & + ~eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype") ]["count"].sum() + remaining_needing_ciga_and_archetype_check = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") & + eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype") + ]["count"].sum() + # We scale this down by the archetype_conversion_rate, and add this on to the remaining_needing_ciga_check + remaining_needing_ciga_and_archetype_check_passed = np.round( + remaining_needing_ciga_and_archetype_check * archetype_conversion_rate + ) + + remaining_needing_ciga_check += remaining_needing_ciga_and_archetype_check_passed + + eco4_no_ciga_needed = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4" + ]["count"].sum() + + eco4_no_ciga_archetype_needed = eligiblity_counts[ + eligiblity_counts["ECO Eligibility"] == "eco4 (subject to archetype)" + ]["count"].sum() + eco4_no_ciga_archetype_needed_passed = np.round( + eco4_no_ciga_archetype_needed * archetype_conversion_rate + ) + + eco4_no_ciga_needed += eco4_no_ciga_archetype_needed_passed + + failed_archetype_check = int( + remaining_needing_ciga_and_archetype_check + + eco4_no_ciga_archetype_needed - + remaining_needing_ciga_and_archetype_check_passed - + eco4_no_ciga_archetype_needed_passed + ) + has_ciga_check = not input_data["ciga_list"].empty if has_ciga_check: - eco4_no_ciga_needed = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"] == "eco4" - ]["count"].sum() - eco4_ciga_passed = eligiblity_counts[ eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga" ]["count"].sum() @@ -3681,8 +3709,10 @@ def calculate_eco4_post_ciga( eco4_no_ciga_needed_or_ciga_passed = eco4_no_ciga_needed + eco4_ciga_passed - eco4_confirmed = (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate) - eco4_confirmed = np.round(eco4_confirmed) + eco4_confirmed = np.round( + (eco4_no_ciga_needed * ha_eco4_to_sale_rate) + + (eco4_ciga_passed * ha_ciga_pass_to_sale_rate) + ) eco4_no_ciga_needed_cancellations = int(eco4_no_ciga_needed_or_ciga_passed - eco4_confirmed) @@ -3704,9 +3734,7 @@ def calculate_eco4_post_ciga( eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations else: - eco4_no_ciga_needed = eligiblity_counts[ - eligiblity_counts["ECO Eligibility"] == "eco4" - ]["count"].sum() + eco4_confirmed_ciga_failures = 0 # Multiply by sale conversion eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate) @@ -3735,6 +3763,9 @@ def calculate_eco4_post_ciga( "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate, "Of which confirmed - £": eco4_confirmed * eco4_rate, "Of which forecast - £": eco4_remaining_forecast * eco4_rate, + # Archetype check failures + "Estimated total - failed archetype check - #": failed_archetype_check, + "Estimated total - failed archetype check - £": failed_archetype_check * eco4_rate, # Ciga failures "Estimated total - failed CIGA": int(eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures), "Confirmed CIGA failures": eco4_confirmed_ciga_failures, @@ -3766,6 +3797,14 @@ def forecast_remaining_sales(loader): gbis_rate = 600 eco4_rate = 1710 + # Based on ONS https://www.ons.gov.uk/peoplepopulationandcommunity/housing/bulletins/housingenglandandwales + # /census2021 + # there are 5.7 million terraced properties in the UK, of the 19.3 million houses or bungalows. We therefore apply + # a 30% discount to homes that are dependent on an archetype check, since around 30% of them will be mid terraced + # This 30% is slightly harsh but we be conservative + # Therefore, the archetype check conversion rate is 70% + archetype_conversion_rate = 0.7 + # 1) Calculate the conversion rate from passed CIGA to actual sale converted_ciga_jobs = [] for ha_name, input_data in loader.data.items(): @@ -4010,13 +4049,27 @@ def forecast_remaining_sales(loader): eco4_pre_ciga = eligiblity_counts[ eligiblity_counts["ECO Eligibility"].isin( - ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + [ + "eco4", + "eco4 (subject to ciga)", + "eco4 - passed ciga", + "failed ciga", + "eco4 (subject to ciga) (subject to archetype)", + "eco4 (subject to archetype)" + ] ) ]["count"].sum() eco4_pre_ciga_remaining = eligiblity_counts_remaining[ eligiblity_counts_remaining["ECO Eligibility"].isin( - ["eco4", "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"] + [ + "eco4", + "eco4 (subject to ciga)", + "eco4 - passed ciga", + "failed ciga", + "eco4 (subject to ciga) (subject to archetype)", + "eco4 (subject to archetype)" + ] ) ]["count"].sum() @@ -4065,7 +4118,8 @@ def forecast_remaining_sales(loader): ha_ciga_conversion_rate=ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate=ha_eco4_to_sale_rate, - eco4_rate=eco4_rate + eco4_rate=eco4_rate, + archetype_conversion_rate=archetype_conversion_rate ) eco4_post_ciga_remaining_results = calculate_eco4_post_ciga( @@ -4074,7 +4128,8 @@ def forecast_remaining_sales(loader): ha_ciga_conversion_rate=ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate=ha_eco4_to_sale_rate, - eco4_rate=eco4_rate + eco4_rate=eco4_rate, + archetype_conversion_rate=archetype_conversion_rate ) # Calculate the delta compared to Warmfront's original remaining @@ -4111,6 +4166,8 @@ def forecast_remaining_sales(loader): gbis_remaining = int(np.round(gbis_remaining * ha_gbis_sale_conversion)) gbis_remaining_revenue = int(gbis_remaining * gbis_rate) + survey_list["installation_status"].value_counts() + # GBIS delta if original_warmfront_remaining_gbis == 0: gbis_delta_vs_original_estimate_remaining = "N/A" @@ -4176,7 +4233,7 @@ def forecast_remaining_sales(loader): surveys_with_eligibility["installation_status"] == "GBIS - cancelled" ].shape[0] - expected_gbis_unconfirmed_sales = incomplete_gbis_sales * ha_gbis_sale_conversion + expected_gbis_unconfirmed_sales = np.round(incomplete_gbis_sales * ha_gbis_sale_conversion) gbis_expected_cancellations = int(incomplete_gbis_sales - expected_gbis_unconfirmed_sales) @@ -4187,10 +4244,12 @@ def forecast_remaining_sales(loader): # Add in the variance: # We should expect that the pre-ciga total is: # 1) The number of post CIGA successes + + # 2) The number of archetype failures + # 2) the number of CIGA failures + # 3) The number of cancellations variance_total = eco4_pre_ciga - ( eco4_post_ciga_total_results["ECO4 - post CIGA - #"] + + eco4_post_ciga_total_results["Estimated total - failed archetype check - #"] + eco4_post_ciga_total_results['Estimated total - failed CIGA'] + eco4_post_ciga_total_results["Expected cancellations - #"] ) @@ -4199,6 +4258,7 @@ def forecast_remaining_sales(loader): variance_remaining = eco4_pre_ciga_remaining - ( eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] + + eco4_post_ciga_remaining_results["Estimated total - failed archetype check - #"] + eco4_post_ciga_remaining_results['Estimated total - failed CIGA'] + eco4_post_ciga_remaining_results["Expected cancellations - #"] ) @@ -4290,6 +4350,11 @@ def forecast_remaining_sales(loader): ("ECO4 Cancellations", "", "Of which expected cancellations - £", ""): eco4_post_ciga_remaining_results[ "Expected cancellations - £" ], + # Archetype check failures + ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - #", ""): + eco4_post_ciga_remaining_results['Estimated total - failed archetype check - #'], + ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - £", ""): + eco4_post_ciga_remaining_results['Estimated total - failed archetype check - £'], # CIGA failures ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[ 'Estimated total - failed CIGA' @@ -4324,7 +4389,7 @@ def forecast_remaining_sales(loader): } # Make sure nothing is forgotten due to duplicate multi-index keys - if len(to_append) != 47: + if len(to_append) != 49: raise ValueError("Something went wrong") results.append(to_append)