From a40c1670cbf971a5dac633b7c0574df0268f0789 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Dec 2023 14:08:40 +0000 Subject: [PATCH] completed ha25 analysis --- etl/eligibility/ha_15_32/ha25_app.py | 53 ++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py index 07470b51..4d86a546 100644 --- a/etl/eligibility/ha_15_32/ha25_app.py +++ b/etl/eligibility/ha_15_32/ha25_app.py @@ -450,6 +450,46 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): return results_df, scoring_data, nodata +def analyse_results(results_df, data, eco4_prospects_survey_list): + analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge( + results_df, how="left", on="row_id" + ) + + warmfront_identified = analysis_data[analysis_data["warmfront_identified"]] + + # Of the ECO jobs, what proportion to we get right + + success_rate = (warmfront_identified["eco4_eligible"] | warmfront_identified["gbis_eligible"]).sum() / \ + warmfront_identified.shape[ + 0] + + # No gbis for this + # gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0] + + # Additional identified + additional_identified_eco = analysis_data[ + (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) + ] + + additional_identified_eco["eligibility_classification"].value_counts() + + additional_identified_gbis = analysis_data[ + (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & ( + analysis_data["warmfront_identified"] == False + ) + ].shape[0] + + # Future + additional_identified_eco_future = analysis_data[ + (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False) + ].shape[0] + additional_identified_gbis_future = analysis_data[ + (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & ( + analysis_data["warmfront_identified"] == False + ) + ].shape[0] + + def app(): data, eco4_prospects_survey_list = load_data() @@ -466,3 +506,16 @@ def app(): ) created_at = datetime.now().isoformat() + + results_df, scoring_data, nodata = get_epc_data(data, cleaned, cleaning_data, created_at) + # Pickle the outputs + # import pickle + # with open("ha25.pickle", "wb") as f: + # pickle.dump( + # { + # "results_df": results_df, + # "scoring_data": scoring_data, + # "nodata": nodata + # }, + # f + # )