From 88c245750d87b681afe4757ba820ae655f4a8b72 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 27 Dec 2023 12:17:38 +0000 Subject: [PATCH] Added analyse_results for ha16 --- etl/eligibility/ha_15_32/ha16_app.py | 152 ++++++++++++++++++++++++++- etl/eligibility/ha_15_32/ha24_app.py | 18 ++++ 2 files changed, 168 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index a1d25c53..678bf76f 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -247,7 +247,7 @@ def load_data(): ) data["warmfront_identified"] = data["warmfront_identified"].fillna(False) - return data + return data, survey_list def get_epc_data(data, cleaned, cleaning_data, created_at): @@ -332,9 +332,144 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): } ) + scoring_df = pd.DataFrame(scoring_data) + + # Perform the same cleaning as in the model - first clean number of room variables though + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], + colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"], + ) + + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"], + ).drop(columns=["LOCAL_AUTHORITY"]) + + scoring_df = DataProcessor.clean_missings_after_description_process( + scoring_df, + ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or ( + "insulation_thickness" in c) or ("ENERGY_EFF" in c)] + ) + + scoring_df = DataProcessor.clean_efficiency_variables(scoring_df) + scoring_df["UPRN"] = scoring_df["UPRN"].astype(int) + + model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at) + all_predictions = model_api.predict_all( + df=scoring_df, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + "heat_demand_predictions": "retrofit-heat-predictions-dev", + "carbon_change_predictions": "retrofit-carbon-predictions-dev" + } + ) + + predictions = all_predictions["sap_change_predictions"].copy() + + results_df = pd.DataFrame(results) + + predictions = predictions.rename(columns={"property_id": "row_id"}).merge( + results_df[["row_id", "sap"]], how="left", on="row_id" + ) + predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] + predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index() + + results_df = results_df.merge( + predictions[["sap_uplift", "row_id"]], + how="left", + on="row_id" + ) + results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"] + + eligibility_assessment = [] + for _, row in results_df[results_df["eco4_eligible"] == True].iterrows(): + # The upgrade requirements are dependent on the current SAP + + # If the property is an F or G, it only needs to upgrade to an % + if row["sap"] <= 38: + if row["post_install_sap"] >= 57: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 55: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 53: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + else: + + if row["post_install_sap"] >= 71: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 69: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 67: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + + eligibility_assessment.append( + { + "row_id": row["row_id"], + "eligibility_classification": eligibility_classification + } + ) + + eligibility_assessment = pd.DataFrame(eligibility_assessment) + + results_df = results_df.merge( + eligibility_assessment, how="left", on="row_id" + ) + return results_df, scoring_data, nodata + + +def analyse_results(results_df, data, survey_list): + analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge( + results_df, how="left", on="row_id" + ).merge( + survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}), + how="left", on="survey_key" + ) + + warmfront_identified = analysis_data[analysis_data["warmfront_identified"]] + + # Of the ECO jobs, what proportion to we get right + warmfront_identified_eco = warmfront_identified[ + warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]) + ] + + eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0] + + warmfront_identified_gbis = warmfront_identified[ + warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]) + ] + + gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0] + + # Additional identified + additional_identified_eco = analysis_data[ + (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) + ].shape[0] + additional_identified_gbis = analysis_data[ + (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & ( + analysis_data["warmfront_identified"] == False + ) + ].shape[0] + # Future + additional_identified_eco_future = analysis_data[ + (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False) + ].shape[0] + additional_identified_gbis_future = analysis_data[ + (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & ( + analysis_data["warmfront_identified"] == False + ) + ].shape[0] + def app(): - data = load_data() + data, survey_list = load_data() data["row_id"] = ["ha16_" + str(i) for i in range(0, len(data))] @@ -349,3 +484,16 @@ def app(): ) created_at = datetime.now().isoformat() + + results_df, scoring_data, nodata = get_epc_data(data, cleaned, cleaning_data, created_at) + + # Store + # import pickle + # with open("ha16.pickle", "wb") as f: + # pickle.dump( + # { + # "scoring_data": scoring_data, + # "results": results_df, + # "nodata": nodata + # }, f + # ) diff --git a/etl/eligibility/ha_15_32/ha24_app.py b/etl/eligibility/ha_15_32/ha24_app.py index ab639003..b8d114b6 100644 --- a/etl/eligibility/ha_15_32/ha24_app.py +++ b/etl/eligibility/ha_15_32/ha24_app.py @@ -53,3 +53,21 @@ def load_data(): asset_list["row_colour_name"] == "red", "does not meet criteria", np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future") ) + + # Read in surveys + survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx') + survey_sheet = survey_workbook.active + + survey_rows = [] + survey_colors = [] + + for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers + row_data = [cell.value for cell in row] # This will get you the cell values + row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None + # row_color = COLOR_INDEX[row_color] + survey_rows.append(row_data) + survey_colors.append(row_color) + + survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]]) + # Drop all None rows + survey_list = survey_list.dropna(how='all')