From 64d42aba67fc601317aa22a971b6d6465a244246 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 23 Dec 2023 15:25:47 +0000 Subject: [PATCH] ha7 --- etl/eligibility/ha_15_32/ha7_app.py | 132 ++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha7_app.py b/etl/eligibility/ha_15_32/ha7_app.py index 139943a1..7d856366 100644 --- a/etl/eligibility/ha_15_32/ha7_app.py +++ b/etl/eligibility/ha_15_32/ha7_app.py @@ -137,6 +137,131 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at): } ) + scoring_df = pd.DataFrame(scoring_data) + # Implement the same process that is being used in the recommendation engine to cleaning scoring_df + + # Perform the same cleaning as in the model - first clean number of room variables though + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], + colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"], + ) + + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"], + ).drop(columns=["LOCAL_AUTHORITY"]) + + scoring_df = DataProcessor.clean_missings_after_description_process( + scoring_df, + ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or ( + "insulation_thickness" in c) or ("ENERGY_EFF" in c)] + ) + + scoring_df = DataProcessor.clean_efficiency_variables(scoring_df) + + model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at) + all_predictions = model_api.predict_all( + df=scoring_df, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + "heat_demand_predictions": "retrofit-heat-predictions-dev", + "carbon_change_predictions": "retrofit-carbon-predictions-dev" + } + ) + + predictions = all_predictions["sap_change_predictions"].copy() + + results_df = pd.DataFrame(results) + + predictions = predictions.rename(columns={"property_id": "row_id"}).merge( + results_df[["row_id", "sap"]], how="left", on="row_id" + ) + predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] + predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index() + + results_df = results_df.merge( + predictions[["sap_uplift", "row_id"]], + how="left", + on="row_id" + ) + + results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"] + + eligibility_assessment = [] + for _, row in results_df[results_df["eco4_eligible"] == True].iterrows(): + # The upgrade requirements are dependent on the current SAP + + # If the property is an F or G, it only needs to upgrade to an % + if row["sap"] <= 38: + if row["post_install_sap"] >= 57: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 55: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 53: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + else: + + if row["post_install_sap"] >= 71: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 69: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 67: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + + eligibility_assessment.append( + { + "row_id": row["row_id"], + "eligibility_classification": eligibility_classification + } + ) + + eligibility_assessment = pd.DataFrame(eligibility_assessment) + + results_df = results_df.merge( + eligibility_assessment, how="left", on="row_id" + ) + + return results_df, scoring_data, nodata + + +def analyse_ha_7(results_df, data): + df = results_df.merge( + data[["row_id", "row_code", "Property Type"]], how="left", on="row_id" + ) + warmfront_identification = df["row_code"].value_counts() + warmfront_identified = df[df["row_code"] == "potential ECO4"] + + property_types = df["Property Type"].value_counts() + + n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum() + + eco_identified = results_df[results_df["eco4_eligible"]] + n_eco4 = eco_identified["eco4_eligible"].sum() + gbis_identified = results_df[~results_df["eco4_eligible"] & results_df["gbis_eligible"]] + n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum() + + eco_eligibile = results_df[results_df["eco4_eligible"]] + eco_eligibile["eligibility_classification"].value_counts() + + future_possibilities_eco = results_df[ + (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) + ].copy() + + future_possibilities_gbis = results_df[ + (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & ( + ~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) + ].copy() + + total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0] + def app(): data = load_data() @@ -153,3 +278,10 @@ def app(): ) created_at = datetime.now().isoformat() + + results_df, scoring_data, nodata = get_ha7_data(data, cleaned, cleaning_data, created_at) + + # Pickle results + # import pickle + # with open("ha7_results.pkl", "wb") as f: + # pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)