From 6ae21bbcb023139961eb69749ac1380a7d3ac001 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 28 Feb 2024 12:31:48 +0000 Subject: [PATCH] Creating the output structure --- etl/eligibility/Eligibility.py | 11 +- .../ha_15_32/ha_analysis_batch_3.py | 548 +++++++----------- 2 files changed, 220 insertions(+), 339 deletions(-) diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index f7a5ed98..b594579f 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -365,7 +365,7 @@ class Eligibility: return # Near perfect - if self.cavity["suitability"] and (current_sap < 55): + if self.cavity["suitability"] and (current_sap < 69): self.gbis_warmfront = { "eligible": True, "strict": True, @@ -373,15 +373,6 @@ class Eligibility: } return - # Suitable cavity, but high sap - if self.cavity["suitability"] and (current_sap >= 55): - self.gbis_warmfront = { - "eligible": True, - "strict": False, - "message": "Meets cavity, fails SAP check", - } - return - self.gbis_warmfront = { "eligible": False, "strict": False, diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 5cbfb90c..61c4a243 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1646,10 +1646,26 @@ def get_epc_data( def get_col_widths(dataframe): - # First we find the maximum length of the index column - idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))]) - # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise - return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns] + # Define a maximum width for any column to prevent excessively wide columns + max_allowed_width = 25 + + # Calculate widths for columns + widths = [] + + if isinstance(dataframe.columns, pd.MultiIndex): + # For MultiIndex, calculate max width considering the header and data + header_widths = [max(len(str(item)) for item in col) + 2 for col in dataframe.columns.values] # +2 for padding + for i, column in enumerate(dataframe.columns): + max_data_width = max(dataframe[column].astype(str).apply(len).max(), header_widths[i]) + widths.append(min(max_data_width, max_allowed_width)) + else: + # For non-MultiIndex, calculate width normally + for col in dataframe.columns: + # Calculate the max length of data or column name and limit it + max_length = max(dataframe[col].astype(str).apply(len).max(), len(str(col)) + 2) # +2 for padding + widths.append(min(max_length, max_allowed_width)) + + return widths def analyse_ha_data(outputs, loader): @@ -1671,42 +1687,13 @@ def analyse_ha_data(outputs, loader): :return: """ - eco4_rate = 1710 - gbis_rate = 600 - ha_analysis_results = [] - ha_revenue_results = [] for ha_name, datasets in outputs.items(): - inputs = [x for k, x in loader.data.items() if k == ha_name][0] - # TODO: This is placeholder because we don't have the schemes that the properties have been qualified for - # yet - # - import random - randomly_allocated_schemes = random.choices(["ECO4", "GBIS"], k=inputs["asset_list"].shape[0]) - inputs["asset_list"]["randomly_allocated_schemes"] = randomly_allocated_schemes - inputs["asset_list"]["funding_scheme"] = None - inputs["asset_list"]["funding_scheme"] = np.where( - inputs["asset_list"]["row_meaning"] == "identified potential eco works (CWI)", - inputs["asset_list"]["randomly_allocated_schemes"], - inputs["asset_list"]["funding_scheme"] - ) - - # TODO: Also temp, just for HA 6 - if ha_name == "ha_6": - inputs["survey_list"]["funding_scheme"] = None - inputs["survey_list"]["funding_scheme"] = np.where( - inputs["survey_list"][ - 'AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION '] == "AFFORDABLE WARMTH", - "ECO4", - "GBIS" - ) - - # End placholder results_df = datasets["results_df"].copy() - analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename( + analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename( columns={"row_meaning": "asset_identification_status"} ).merge( results_df, @@ -1715,293 +1702,236 @@ def analyse_ha_data(outputs, loader): left_on="asset_list_row_id" ) - # We now merge the survey list onto the analysis data and remove anything that is sold, to give us just what is - # remaining + ################################################################################################ + # We take the properties that strictly qualified under eco + ################################################################################################ - if inputs["matched_lookup"] is not None: - analysis_data = analysis_data.merge( - inputs["matched_lookup"], how="left", on="asset_list_row_id" + eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy() + eco4_identified["identification_type"] = None + eco4_identified["identification_type"] = np.where( + (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True), + "strict", + eco4_identified["identification_type"] + ) + + eco4_identified["identification_type"] = np.where( + (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False), + "expansive", + eco4_identified["identification_type"] + ) + ################################################################################################ + # We take the properties dependent on CIGA + ################################################################################################ + + ciga_dependent_identified = analysis_data[ + analysis_data["ECO Eligibility"].isin( + [ + "eco4 (subject to ciga)", + "eco4 - passed ciga" + ] ) - # Drop any rows that have a survey_list_row_id - analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])] + ].copy() - # If we have a survey list, we merge this onto the results - n_properties_in_asset_list = analysis_data["asset_list_row_id"].nunique() - - properties_sold = ( - inputs["survey_list"].groupby("funding_scheme")["survey_list_row_id"].nunique().reset_index() if - inputs["survey_list"] is not None else pd.DataFrame(columns=["funding_scheme"]) - ) - properties_sold_eco4 = ( - properties_sold[properties_sold["funding_scheme"] == "ECO4"]["survey_list_row_id"].values[0] if - (not properties_sold.empty) and ("ECO4" in properties_sold["funding_scheme"].values) else 0 - ) - properties_sold_gbis = ( - properties_sold[properties_sold["funding_scheme"] == "GBIS"]["survey_list_row_id"].values[0] if - (not properties_sold.empty) and ("GBIS" in properties_sold["funding_scheme"].values) else 0 + # These are properties that show filled cavity + ciga_dependent_identified["identification_type"] = None + ciga_dependent_identified["identification_type"] = np.where( + ciga_dependent_identified["eco4_message"].isin( + [ + "Perfect suitability", + "Meets cavity and sap", + "Fails cavity, meets loft, fails SAP", + "Meets fabric, fails SAP check", + "Meets cavity, loft borderline, meets sap", + ] + ), + "strict", + ciga_dependent_identified["identification_type"] ) - # We now calculate the number of remaining properties, by scheme - remaining_properties = analysis_data[ - analysis_data["asset_identification_status"] == "identified potential eco works (CWI)" - ].copy() - remaining_properties["prospect_type"] = None - - remaining_properties_by_scheme = ( - remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index() + ciga_dependent_identified["identification_type"] = np.where( + (ciga_dependent_identified["eco4_message"].isin(["All conditions fail", "failed fabric check"])) & + (ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])), + "expansive", + ciga_dependent_identified["identification_type"] ) - n_remaining_properties_eco4 = remaining_properties_by_scheme[ - remaining_properties_by_scheme["funding_scheme"] == "ECO4" - ]["asset_list_row_id"].values[0] + ciga_dependent_identified["identification_type"] = np.where( + (ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | ( + ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"]) + ), + "expansive", + ciga_dependent_identified["identification_type"] + ) - n_remaining_properties_gbis = remaining_properties_by_scheme[ - remaining_properties_by_scheme["funding_scheme"] == "GBIS" - ]["asset_list_row_id"].values[0] + ################################################################################################ + # We properties that qualified for gbis + ################################################################################################ + gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy() + gbis_identified["identification_type"] = None + gbis_identified["identification_type"] = np.where( + (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69), + "strict", + gbis_identified["identification_type"] + ) - # For the remaining properties, we use the results of the eligibility process to classify the property into - # one of multiple categories - # - # For properties that have been identified as ECO4 - # 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because - # Warmfront regularly re-surveys properties which then fall within the SAP requirement - # - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties - # here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have - # very old EPCs which may score lower when re-done - # 2) Meets Fabric requirements, not SAP - # Warmfront has identified the property as eligible, but the EPC is not D or below. We consider this but - # label is separately as not a strict - # 3) Subject to CIGA check - Meets loft conditions but shows a filled cavity. - # - we don't have a SAP constraint here because the EPC is (currently) showing what the property might - # actually look like after retrofit and so the EPC currently being a C or above means little, because - # the updated EPC, showing an empty cavity, could bring the property within - # 4) Loft insulation too thick - Meets empty cavity but shows a loft with between 101 and 270mm insulation. - # - No SAP constraint, for the same reason as in category 2) - # 5) Looks like GBIS instead - # 6) Does not look like ECO4 candidate - # - # For properties that have been identified as GBIS - # 1) Strict GBIS candidates - # 2) Properties that actually look like strict GBIS candidates - # 3) Subject to CIGA check - Filled cavity - # 4) Does not look like a GBIS candidate + gbis_identified["identification_type"] = np.where( + (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] >= 69), + "expansive", + gbis_identified["identification_type"] + ) - remaining_eco4_df = remaining_properties[ - remaining_properties["funding_scheme"] == "ECO4" - ].copy() + # Finally, we look at the properties that have not been identified by Warmfront + not_identified = analysis_data[ + analysis_data["ECO Eligibility"].isin( + [ + "not eligible" + ] + ) + ].copy() - #################################### + surplus_eco4 = not_identified[ + (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin( + ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"] + )) + ] + + surplus_gbis = not_identified[ + (not_identified["gbis_eligible"] == True) & ( + ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values) + ) & (not_identified["sap"] < 69) & ( + (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | ( + not_identified["walls"].str.contains("partial", case=False, na=False) + ) + ) + ] + surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False] + + # Output variables # ECO4 - #################################### - - # 1) We identify this if: - # - remaining_properties["eco4_eligible"] == True - - remaining_eco4_df["prospect_type"] = np.where( - (remaining_eco4_df["eco4_eligible"] == True), - "strict ECO4", - remaining_eco4_df["prospect_type"] + n_properties_in_asset_list = inputs["asset_list"].shape[0] + n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0] + eco4_of_which_identified_strict = ( + eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] + + ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0] ) - - # 2) Meets fabric requirements - remaining_eco4_df["prospect_type"] = np.where( - ( - (remaining_eco4_df["eco4_message"] == "sap too high") & - remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) & - remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) & - pd.isnull(remaining_eco4_df["prospect_type"]) - ), - "ECO4 if SAP downgrade", - remaining_eco4_df["prospect_type"] + eco4_of_which_identified_expansive = ( + eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] + + ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0] ) - - # 3) We identify this if it has a filled cavity but meets the loft conditions - # TODO: Consider if we should also allow 100-270mm or if we should add some slight tolerance (e.g. 150mm) - # to account for measurement error - remaining_eco4_df["prospect_type"] = np.where( - ( - remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) & - remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) - ), - "ECO4 - Filled cavity - subject to CIGA check", - remaining_eco4_df["prospect_type"] - ) - - # 4) We identify this by ensuring the cavity if empty or partial, and the loft has between 101 and 270mm - remaining_eco4_df["prospect_type"] = np.where( - ( - remaining_eco4_df["eligibility_cavity_type"].isin(["empty", "partial"]) & - remaining_eco4_df["eligibility_loft_type"].isin(["100-270mm"]) - ), - "ECO4 prospect - empty cavity, loft insulation below regulation", - remaining_eco4_df["prospect_type"] - ) - - # 5) Looks like GBIS instead - remaining_eco4_df["prospect_type"] = np.where( - (remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]), - "Looks like GBIS", - remaining_eco4_df["prospect_type"] - ) - - # 6) This is everything else (i.e. both the cavity is full and the loft insulation is above 100mm) - remaining_eco4_df["prospect_type"] = remaining_eco4_df["prospect_type"].fillna( - "Does not look like ECO4 candidate" - ) - - #################################### # GBIS - #################################### - - remaining_gbis = remaining_properties[ - remaining_properties["funding_scheme"] == "GBIS" - ].copy() - - # 1) Strict GBIS candidates - remaining_gbis["prospect_type"] = np.where( - ( - (remaining_gbis["gbis_eligible"] == True) & (remaining_gbis["eco4_eligible"] == False) - ), - "strict GBIS", - remaining_gbis["prospect_type"] - ) - - # 2) GBIS candidates that look like strict ECO4 candidates - remaining_gbis["prospect_type"] = np.where( - (remaining_gbis["eco4_eligible"] == True), - "GBIS - Upgradable to ECO4", - remaining_gbis["prospect_type"] - ) - - # 3) Subject to CIGA check - Filled cavity - remaining_gbis["prospect_type"] = np.where( - ( - remaining_gbis["eligibility_cavity_type"].isin(["full"]) & - pd.isnull(remaining_gbis["prospect_type"]) - ), - "GBIS - Filled cavity - subject to CIGA check", - remaining_gbis["prospect_type"] - ) - - # 4) Everything else - remaining_gbis["prospect_type"] = remaining_gbis["prospect_type"].fillna( - "Does not look like GBIS candidate" - ) - - #################################### - # Surplus properties - #################################### - - # Take properties that were not identified by Warmfront and identify those that look like they would qualify - # under the strictest criteria - surplus_df = analysis_data[ - analysis_data["asset_identification_status"] != "identified potential eco works (CWI)" - ].copy() - - eco4_surplus = surplus_df[ - ( - (surplus_df["eco4_eligible"] == True) & (surplus_df["eco4_message"] == "subject to post retrofit sap") & - ( - surplus_df["eligibility_classification"].isin( - ["high confidence", "highest confidence", "medium confidence"] - ) - ) - ) - ].copy() - - gbis_surplus = surplus_df[ - ( - (surplus_df["gbis_eligible"] == True) & (surplus_df["eco4_eligible"] == False) & ( - surplus_df["eligibility_cavity_type"].isin(["empty", "partial"]) - ) - ) - ].copy() - - # Perform some checks to make sure we have all of the values - remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict() - if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]): - raise ValueError( - "Number of remaining properties does not match the number of properties in remaining ECO4 dict" - ) - - remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict() - if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]): - raise ValueError( - "Number of remaining properties does not match the number of properties in remaining GBIS dict" - ) + n_warmfront_identified_gbis = gbis_identified.shape[0] + gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0] + gbis_of_which_identified_expansive = \ + gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0] to_append = { - "ha_name": ha_name, - "n_properties_in_asset_list": n_properties_in_asset_list, + ("", "HA Name"): ha_name, + ("", "# Properties in asset list"): n_properties_in_asset_list, ############ # ECO4 ############ - "properties_sold_eco4": properties_sold_eco4, - "n_remaining_properties_eco4": n_remaining_properties_eco4, - **remaining_eco4_dict, + ("ECO4", "# Properties identieid by Warmfront"): n_warmfront_identified_eco4, + ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict, + ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive, + ("ECO4", "Of which identified by model - total"): ( + eco4_of_which_identified_strict + eco4_of_which_identified_expansive), + ("ECO4", "Additional properties"): surplus_eco4.shape[0], ############ # GBIS ############ - "properties_sold_gbis": properties_sold_gbis, - "n_remaining_properties_gbis": n_remaining_properties_gbis, - **remaining_gbis_dict, - ############ - # GBIS - ############ - "n_eco4_surplus": eco4_surplus.shape[0], - "n_gbis_surplus": gbis_surplus.shape[0], + ("GBIS", "# Properties identieid by Warmfront"): n_warmfront_identified_gbis, + ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict, + ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive, + ("GBIS", "Of which identified by model - total"): ( + gbis_of_which_identified_strict + gbis_of_which_identified_expansive + ), + ("GBIS", "Additional properties"): surplus_gbis.shape[0] } ha_analysis_results.append(to_append) - revenue_to_append = { - "ha_name": ha_name, - "£ Remaining from asset list": ( - n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate - ), - "Of which: Strict": ( - to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate + - to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate - ), - "Of which: Subject to CIGA": ( - to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate + - to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate - ), - "Of which: Prospect, not perfect strict prospect": ( - to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate + - to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate - ), - "Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate, - "Of which: Does not look like prospect": ( - to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate + - to_append.get("Does not look like GBIS candidate", 0) * gbis_rate - ), - "Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate, - "Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate) - } - - # Perform a quick check: - if revenue_to_append["£ Remaining from asset list"] - ( - revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] + - revenue_to_append["Of which: Prospect, not perfect strict prospect"] + - revenue_to_append["Of which: Potential downgrade to GBIS"] + - revenue_to_append["Of which: Does not look like prospect"] - ) > 1: - raise ValueError("Error between top level revenue figures and breakdown - investigate me") - - ha_revenue_results.append(revenue_to_append) - ha_analysis_results = pd.DataFrame(ha_analysis_results) - ha_revenue_results = pd.DataFrame(ha_revenue_results) + ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns) + facts_and_figures = loader.facts_and_figures.copy() + facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int) + facts_and_figures = facts_and_figures.sort_values("ha_number") + facts_and_figures = facts_and_figures.drop(columns=["ha_number"]) + + # Rename some of the cols + facts_and_figures = facts_and_figures.rename( + columns={ + # ECO4 cols + "ECO4": "ECO4 - December", + "GBIS": "GBIS - December", + "eco4 (subject to ciga)": "ECO4 - subject to ciga", + "eco4": "ECO4 - doesn't need CIGA", + "eco4 - passed ciga": "ECO4 - passed CIGA", + "failed ciga": "ECO4 - failed CIGA", + "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS", + "ECO4 - in progress": "ECO4 - Install in progress", + "ECO4 - cancelled": "ECO4 - Install cancelled", + # GBIS cols + "gbis": "GBIS total (asset list)" + } + ) + # We calculate the eco4 total from the asset list + # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is + # ECO4 - doesn't need CIGA + ECO4 - passed CIGA + # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is + # ECO4 - doesn't need CIGA + ECO4 - subject to ciga + facts_and_figures["ECO4 total (asset list)"] = np.where( + facts_and_figures["ECO4 - passed CIGA"] > 0, + facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"], + facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - subject to ciga"] + ) + + # Re-arrange the columns + facts_and_figures = facts_and_figures[ + [ + 'HA Name', + 'ECO4 - December', + 'GBIS - December', + 'ECO4 total (asset list)', + 'GBIS total (asset list)', + 'ECO4 - subject to ciga', + "ECO4 - doesn't need CIGA", + 'ECO4 - passed CIGA', + 'ECO4 - failed CIGA', + 'ECO4 - installed', + 'ECO4 - Install in progress', + 'ECO4 - Install cancelled', + 'ECO4 - partially installed', + 'ECO4 - Install downgrade to GBIS', + ] + ] + # Addd a note to flag any rows where ECO4 ( + # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0 + # ) + facts_and_figures["Missed CIGA checks opportunity"] = None + facts_and_figures["Missed CIGA checks opportunity"] = np.where( + (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0), + "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype( + str) + " ECO4 properties needing a CIGA check", + facts_and_figures["Missed CIGA checks opportunity"] + ) + + # Re arrage the columns + + # Also sort ha_analysis_results by ha number + ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int) + ha_analysis_results = ha_analysis_results.sort_values("ha_number") + ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"]) + + # We save 2 sheets # Automate creation of the excel # Create a Pandas Excel writer using XlsxWriter as the engine - with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer: + with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer: # Write each dataframe to a different worksheet without the index - for df, sheet in [(ha_revenue_results, 'Total Revenue'), - (ha_analysis_results, 'By ECO4 and GBIS')]: + for df, sheet in [(facts_and_figures, 'HA Facts and Figures'), + (ha_analysis_results, 'Asset Identification')]: - df.to_excel(writer, sheet_name=sheet, index=False) + df.to_excel(writer, sheet_name=sheet) # Auto-adjust columns' width for i, width in enumerate(get_col_widths(df)): @@ -2134,7 +2064,7 @@ def app(): # Determines if we want to use the cached data in s3 use_cache = True # Determines if we want to perform the data pull - pull_data = True + pull_data = False # List all of the data in the folder directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] @@ -2173,43 +2103,3 @@ def app(): floor_area_decile_thresholds=floor_area_decile_thresholds, pull_data=pull_data ) - - # for ha_name, datasets in outputs.items(): - # datasets["results_df"] = datasets["results_df"].drop( - # columns=["eligibility_cavity_type", "eligibility_loft_type"] - # ) - # - # # Re-do - # res = [] - # for _, row in tqdm(datasets["results_df"].iterrows(), total=datasets["results_df"].shape[0]): - # epc = { - # "walls-description": row["walls"], - # "roof-description": row["roof"], - # "floor-description": "", - # "tenure": "", - # "current-energy-efficiency": row["sap"], - # } - # eligibility = Eligibility(epc=epc, cleaned=cleaned) - # eligibility.check_eco4_warmfront() - # res.append( - # { - # "row_id": row["row_id"], - # "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"], - # "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"] - # } - # ) - # - # # Merge back on - # res = pd.DataFrame(res) - # datasets["results_df"] = datasets["results_df"].merge(res, how="left", on="row_id") - # - # # Re-save in s3 - # save_pickle_to_s3( - # data={ - # "results_df": datasets["results_df"], - # "scoring_df": datasets["scoring_df"], - # "nodata": datasets["nodata"] - # }, - # bucket_name="retrofit-datalake-dev", - # s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle" - # )