From 724379a86d1bd9b79159f2f8f9e5d8abe9496f5f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 26 Mar 2024 18:05:08 +0000 Subject: [PATCH] wrapping up ha analysis --- .../ha_15_32/ha_analysis_batch_3.py | 170 ++++++++++-------- 1 file changed, 94 insertions(+), 76 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 2f17ed73..e414cd00 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -5366,6 +5366,7 @@ def forecast_remaining_sales(loader): results = [] for ha_name, input_data in loader.data.items(): + # Original warmfront figures - ECO4 original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name] if original_warmfront_estimates.empty: @@ -6032,7 +6033,7 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): has_bruh = [ "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", - "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", + "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20", ] @@ -6129,7 +6130,7 @@ def fml_analysis(loader): assumed_ciga_pass_rate = 0.731 has_bruh = [ "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", - "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", + "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20", ] @@ -6738,89 +6739,106 @@ def app(): loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs) loader.load() loader.ha_facts_and_figures() - forecast_remaining_sales(loader) - conversion_rate = 0.95 - archetype_check_conversion = 0.7 - res = [] - for k, v in loader.data.items(): - asset_list = v["asset_list"].copy() - agg = asset_list["ECO Eligibility"].value_counts() - # We find a case where there are properties that have passed CIGA - if not any("passed" in x for x in agg.index): + # gbis rate + # breakdowns = [] + # for ha, data_assets in loader.data.items(): + # asset_list = data_assets["asset_list"].copy() + # breakdown = asset_list["ECO Eligibility"].value_counts().to_dict() + # breakdowns.append(breakdown) + # breakdowns = pd.DataFrame(breakdowns) + # + # installer = [] + # for ha, data_assets in loader.data.items(): + # survey_list = data_assets["survey_list"] + # if survey_list.empty: + # continue + # if "INSTALLER" not in survey_list.columns: + # continue + # + # installers = survey_list["INSTALLER"].value_counts().to_dict() + # installers["ha_name"] = ha + # installer.append(installers) + # installer = pd.DataFrame(installer) + # installer.drop(columns=["ha_name"]).sum().sum() + + # Adhoc - for HA16, get the properties that still need a CIGA check + asset_list_ha16 = loader.data["HA16"]["asset_list"].copy() + ha_16_need_ciga = asset_list_ha16[ + asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga") + ] + completed_cigas = loader.data["HA16"]["ciga_list"].copy() + # Store the results + ha_16_need_ciga.to_csv("ha16_need_ciga.csv") + completed_cigas.to_csv("ha16_completed_cigas.csv") + + # Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for + # live projects + + # Read excel + orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx" + orderbook_workbook = openpyxl.load_workbook(orderbook_filepath) + orderbook_sheet = orderbook_workbook["Contractual Info"] + orderbook_colnames = [cell.value for cell in orderbook_sheet[1]] + + rows = [] + for row in orderbook_sheet.iter_rows(min_row=2, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + rows.append(row_data) + + orderbook = pd.DataFrame(rows, columns=orderbook_colnames) + live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy() + live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "") + + dormant_properties = [] + missed_has = [] + for _, customer in live_orderbook.iterrows(): + if customer['Redacted HA'] not in loader.data.keys(): + missed_has.append(customer['Redacted HA']) continue + asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy() + survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy() + # Remove sold + if not survey_list.empty: + survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])] + asset_list = asset_list.merge( + survey_list[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + # Anything that has an installation has gone to installation, and therefore is not remaining + asset_list = asset_list[pd.isnull(asset_list["installation_status"])] + asset_list = asset_list.drop(columns=["installation_status"]) - agg = pd.DataFrame(agg).reset_index() - - passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"] - passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0 - - failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"] - failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0 - - ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1 - - dormant_ciga = agg[ - agg["ECO Eligibility"].str.contains("subject to ciga") & - ~agg["ECO Eligibility"].str.contains("subject to archetype") + # We pull out the properties that need a CIGA check + need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"] + need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"] + need_ciga_and_archetype = asset_list[ + asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)" ] - dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0 - - dormant_ciga_archetype = agg[ - agg["ECO Eligibility"].str.contains("subject to ciga") & - agg["ECO Eligibility"].str.contains("subject to archetype") - ] - - dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0 - - needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion - needing_check = np.round(needing_check) - - additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + ( - dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate - ) - additional_jobs = np.round(additional_jobs) - - # We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs - original_estimate = loader.december_figures[ - loader.december_figures["HA Name"] == k - ] - - original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0 - base_eco_figures = agg[ - agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"]) - ]["count"].sum() - eco4_from_ciga = original_estimate - base_eco_figures - eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0 - surplus_from_dormant = additional_jobs - eco4_from_ciga - surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant - - res.append( + dormant_properties.append( { - "ha_name": k, - "additional_eco4": additional_jobs, - "needing_check": needing_check, - "surplus_from_dormant": surplus_from_dormant + "HA Name": customer['Redacted HA'], + "Need CIGA": need_ciga.shape[0], + "Need Archetype": need_archetype.shape[0], + "Need CIGA and Archetype": need_ciga_and_archetype.shape[0] } ) - res = pd.DataFrame(res) - # Drop the HAs that are not in that pervious draft - # In the v2 draft, there are 12 HAs + dormant_properties = pd.DataFrame(dormant_properties) + totals = dormant_properties.sum() + totals["HA Name"] = "Total" - v5_surplus = res[ - ~res["ha_name"].isin(["HA9"]) - ]["additional_eco4"].sum() - # 7212 properties - # This is not a perfect difference though, because of the variations in how the numbers are recorded in the November - # all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255, - # however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties - # pre-CIGA + dormant_properties = pd.concat([dormant_properties, totals.to_frame().T]) + dormant_properties.to_csv("dormant_properties.csv") - v5_surplus_from_dormant = res[ - ~res["ha_name"].isin(["HA9"]) - ]["surplus_from_dormant"].sum() - # 5539.0 - # 9471690 + loader.december_figures["ECO4 remaining"].sum() + december_figures = loader.december_figures.copy() + december_figures["ECO4 remaining"] = np.where( + december_figures["ECO4 remaining"] < 0, + 0, + december_figures["ECO4 remaining"] + ) + december_figures["ECO4 remaining"].sum()