diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 627fcede..2f17ed73 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -51,6 +51,12 @@ PROPERTY_TYPE_LOOKUP = { 'MAISONETTE': "Maisonette", 'HOSTEL': None }, + "HA5": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Bedsit": None + }, "HA6": { "property_type": { 'HOUSE': "House", @@ -161,6 +167,21 @@ PROPERTY_TYPE_LOOKUP = { "Hostel": None, "Block": None, }, + "HA20": { + "House": "House", + "Flat": "Flat", + 'Sheltered Flat': "Flat", + 'Maisonette': 'Maisonette', + 'Bungalow': 'Bungalow', + 'House. SD': 'House', + 'House. MT': 'House', + 'House. ET': 'House', + 'Sheltered Bungalow': 'Bungalow', + 'Guest Accomodation': None, + 'Sheltered House': 'House', + 'House. MT ': 'House', + 'House. D': 'House' + }, "HA24": { '01 HOUSE': 'House', '02 FLAT': 'Flat', @@ -3632,6 +3653,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA2": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip()) built_form = None + elif ha_name == "HA5": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) + built_form = None elif ha_name == "HA6": property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]] built_form = property_meta["built_form"] @@ -3694,6 +3718,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA19": property_type = property_meta["Dwelling Type"] built_form = None + elif ha_name == "HA20": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) + built_form = None elif ha_name == "HA21": property_description = property_meta["Property Type"].strip().lower() if "house" in property_description: @@ -5775,6 +5802,7 @@ def forecast_remaining_sales(loader): results.append(to_append) results = pd.DataFrame(results) + results.to_csv("pipeline_remaining_raw.csv") totals_row = {} for col in results.columns: @@ -6006,9 +6034,7 @@ def fml_data_pull(loader): "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', - # NEW - add property type - "HA17" + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20", ] # Can't pull from EPC database because it's based in Scotland @@ -6105,7 +6131,7 @@ def fml_analysis(loader): "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52' + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20", ] no_ciga_cavity_descriptions = [ @@ -6124,22 +6150,6 @@ def fml_analysis(loader): # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass # them! Non-invasices will have checked the wall though - codes = [ - "HA39", "HA14", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA7", - "HA16", "HA107", "HA25", "HA50", "HA41", "HA48", "HA2", "HA63", "HA12", - "HA117", "HA13", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", - "HA30", "HA31", "HA54", "HAXX", "HA49", "HAXXX", - ] - - values = [ - 706, 2161, 1053, 793, 0, 656, 1200, 1647, 4248, 2703, 1087, 1876, 2135, - 1078, 775, 538, 518, 401, 466, 2627, 98, 1050, 524, 191, 538, 384, 204, - 281, 422, 74, 313, 71, 6 - ] - - # Create a dictionary mapping - remaining_eligible_mapping = dict(zip(codes, values)) - results = [] wall_descriptions = [] for ha_name in tqdm(has_bruh): @@ -6397,9 +6407,13 @@ def fml_analysis(loader): without_survey_without_ciga_expected = 0 else: # We apply the same conversion rate as the properties with a survey - without_survey_without_ciga_expected = np.round( - without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0]) - ) + + if ciga_check_needed.shape[0] == 0 and ciga_check_expectation == 0: + without_survey_without_ciga_expected = without_survey_needing_ciga.shape[0] + else: + without_survey_without_ciga_expected = np.round( + without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0]) + ) without_survey_passed_ciga = fuck_this[ (fuck_this["estimated"] == True) & @@ -6466,15 +6480,6 @@ def fml_analysis(loader): without_survey_identified_as_gbis_qualified ) - surveys = loader.data[ha_name]["survey_list"] - sold_now = 0 - if not surveys.empty: - sold_now = surveys[ - surveys["installation_status"].str.lower().str.contains("eco4") - ].shape[0] - - sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0] - results.append( { "HA Name": ha_name, @@ -6498,6 +6503,194 @@ def fml_analysis(loader): # TODO: Change the left hand side number for our post CIGA estimates +def create_final_report(): + """ + This function will produce the final output for the HA analysis + :return: + """ + epc_validated_results = pd.read_csv("analysis - revised.csv") + pipeline_results = pd.read_csv("pipeline_remaining_raw.csv") + + #################################### + # Original Warmfront estimates + #################################### + # Create the volumes result + all_ha_summary_remaining = pipeline_results[ + [ + "('', '', '', 'HA Name')", + "('ECO4 original', '', 'Remaining - #', '')", + "('GBIS original', '', 'Remaining - #', '')", + ] + ].copy().rename( + columns={ + "('', '', '', 'HA Name')": "HA Name", + "('ECO4 original', '', 'Remaining - #', '')": "# ECO4 remaining - All HA Summary", + "('GBIS original', '', 'Remaining - #', '')": "# GBIS remaining - All HA Summary", + } + ) + all_ha_summary_remaining["# Total remaining - All HA Summary"] = ( + all_ha_summary_remaining["# ECO4 remaining - All HA Summary"] + + all_ha_summary_remaining["# GBIS remaining - All HA Summary"] + ) + all_ha_summary_remaining = all_ha_summary_remaining.sort_values("HA Name") + + #################################### + # Postcode list - pre-CIGA + #################################### + postcode_list_pre_ciga_remaining = pipeline_results[ + [ + "('', '', '', 'HA Name')", + "('ECO4 pre-ciga', '', 'Remaining - #', '')", + "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')", + ] + ].copy().rename( + columns={ + "('', '', '', 'HA Name')": "HA Name", + "('ECO4 pre-ciga', '', 'Remaining - #', '')": "# ECO4 remaining - Postcode list (pre CIGA)", + "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": ( + "# GBIS remaining - Postcode list (pre CIGA)" + ), + } + ) + + postcode_list_pre_ciga_remaining["# Total remaining - Postcode list (pre CIGA)"] = ( + postcode_list_pre_ciga_remaining["# ECO4 remaining - Postcode list (pre CIGA)"] + + postcode_list_pre_ciga_remaining["# GBIS remaining - Postcode list (pre CIGA)"] + ) + postcode_list_pre_ciga_remaining = postcode_list_pre_ciga_remaining.sort_values("HA Name") + + #################################### + # Postcode list - post-CIGA + #################################### + postcode_list_post_ciga_remaining = pipeline_results[ + [ + "('', '', '', 'HA Name')", + "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')", + "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')", + ] + ].copy().rename( + columns={ + "('', '', '', 'HA Name')": "HA Name", + "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')": + "# ECO4 remaining - Postcode list (post CIGA)", + "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": ( + "# GBIS remaining - Postcode list (post CIGA)" + ), + } + ) + + postcode_list_post_ciga_remaining["# Total remaining - Postcode list (post CIGA)"] = ( + postcode_list_post_ciga_remaining["# ECO4 remaining - Postcode list (post CIGA)"] + + postcode_list_post_ciga_remaining["# GBIS remaining - Postcode list (post CIGA)"] + ) + postcode_list_post_ciga_remaining = postcode_list_post_ciga_remaining.sort_values("HA Name") + + #################################### + # From EPC Database + #################################### + from_epc_database = epc_validated_results[ + [ + "HA Name", + "EPC verified ECO4 Eligible - Remaining", + "EPC verified GBIS Eligibile - Remaining" + ] + ].copy().rename( + columns={ + "EPC verified ECO4 Eligible - Remaining": "# ECO4 remaining - From EPC Database (post CIGA)", + "EPC verified GBIS Eligibile - Remaining": "# GBIS remaining - From EPC Database (post CIGA)", + } + ) + + from_epc_database["# Total remaining - From EPC Database (post CIGA)"] = ( + from_epc_database["# ECO4 remaining - From EPC Database (post CIGA)"] + + from_epc_database["# GBIS remaining - From EPC Database (post CIGA)"] + ) + from_epc_database = from_epc_database.sort_values("HA Name") + + # Combine the datasets + volumes = all_ha_summary_remaining.merge( + postcode_list_pre_ciga_remaining, how="left", on="HA Name" + ).merge( + postcode_list_post_ciga_remaining, how="left", on="HA Name" + ).merge( + from_epc_database, how="inner", on="HA Name" + ) + + revenue = volumes.copy() + # Convert the ECO4 volumes to revenue + for col in [ + '# ECO4 remaining - All HA Summary', + '# ECO4 remaining - Postcode list (pre CIGA)', + '# ECO4 remaining - Postcode list (post CIGA)', + '# ECO4 remaining - From EPC Database (post CIGA)' + ]: + revenue[col] = revenue[col] * 1710 + + # Convert the GBIS volumes to revenue + for col in [ + '# GBIS remaining - All HA Summary', + '# GBIS remaining - Postcode list (pre CIGA)', + '# GBIS remaining - Postcode list (post CIGA)', + '# GBIS remaining - From EPC Database (post CIGA)' + ]: + revenue[col] = revenue[col] * 600 + + # Re-calculate the totals + revenue['# Total remaining - All HA Summary'] = ( + revenue['# ECO4 remaining - All HA Summary'] + revenue['# GBIS remaining - All HA Summary'] + ) + + revenue['# Total remaining - Postcode list (pre CIGA)'] = ( + revenue['# ECO4 remaining - Postcode list (pre CIGA)'] + revenue['# GBIS remaining - Postcode list (pre CIGA)'] + ) + + revenue['# Total remaining - Postcode list (post CIGA)'] = ( + revenue['# ECO4 remaining - Postcode list (post CIGA)'] + revenue[ + '# GBIS remaining - Postcode list (post CIGA)'] + ) + + revenue['# Total remaining - From EPC Database (post CIGA)'] = ( + revenue['# ECO4 remaining - From EPC Database (post CIGA)'] + + revenue['# GBIS remaining - From EPC Database (post CIGA)'] + ) + + # Replace the # with £ in the columns + revnue_colnames = [col.replace("#", "£") for col in revenue.columns] + revenue.columns = revnue_colnames + + # We check that each column gets smaller + decreasing_check1 = all( + volumes["# ECO4 remaining - Postcode list (pre CIGA)"] >= volumes[ + '# ECO4 remaining - Postcode list (post CIGA)'] + ) + if not decreasing_check1: + raise ValueError("decreasing_check1 failed") + + # Just HA32 and HA17 should fail this, and it's due to GBIS jobs looking like ECO4 + decreasing_check2 = volumes[volumes["# ECO4 remaining - From EPC Database (post CIGA)"] > volumes[ + "# ECO4 remaining - Postcode list (post CIGA)"]] + + if set(decreasing_check2["HA Name"].tolist()) != {"HA17", "HA32"}: + raise ValueError("decreasing_check2 failed") + + # Check for GBIS + decreasing_check3 = all( + volumes["# GBIS remaining - Postcode list (pre CIGA)"] >= volumes[ + '# GBIS remaining - Postcode list (post CIGA)'] + ) + + if not decreasing_check3: + raise ValueError("decreasing_check3 failed") + + # Don't perform this - this happens for multiple + # decreasing_check4 = volumes[volumes["# GBIS remaining - From EPC Database (post CIGA)"] > volumes[ + # "# GBIS remaining - Postcode list (post CIGA)"]] + + # Store final outputs + volumes.to_csv("HA Analysis Final - volumes.csv") + revenue.to_csv("HA Analysis Final - revenue.csv") + + def app(): """ This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.