From 707724cdb14d228c4d93c0cf9ecd98937610e621 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 14 Dec 2023 10:16:57 +0000 Subject: [PATCH] Adding more ha analysis --- etl/eligibility/ha_15_32/app.py | 95 +++++++++++++++++++++++++++++---- 1 file changed, 85 insertions(+), 10 deletions(-) diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index 9d752d2d..48dd5daf 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -472,6 +472,8 @@ def get_ha_32data(ha_data, cleaned, cleaning_data, created_at): "message": "No EPC found", "gbis_eligible_future": None, "gbis_eligible_future_message": None, + "eco4_eligible_future": None, + "eco4_eligible_future_message": None, "tenure": None, "heating_description": None, } @@ -529,6 +531,8 @@ def get_ha_32data(ha_data, cleaned, cleaning_data, created_at): "message": "eco4 conditional on post sap", "gbis_eligible_future": eligibility.gbis["eligible"], "gbis_eligible_future_message": eligibility.gbis["message"], + "eco4_eligible_future": eligibility.eco4["eligible"], + "eco4_eligible_future_message": eligibility.eco4["message"], "tenure": eligibility.tenure, "heating_description": eligibility.epc["mainheat-description"], } @@ -549,6 +553,8 @@ def get_ha_32data(ha_data, cleaned, cleaning_data, created_at): "message": None, "gbis_eligible_future": eligibility.gbis["eligible"], "gbis_eligible_future_message": eligibility.gbis["message"], + "eco4_eligible_future": eligibility.eco4["eligible"], + "eco4_eligible_future_message": eligibility.eco4["message"], "tenure": eligibility.tenure, "heating_description": eligibility.epc["mainheat-description"], } @@ -613,6 +619,10 @@ def get_ha_15data(ha_data, cleaned, cleaning_data, created_at): "walls": None, "date_epc": None, "message": "No EPC found", + "eco4_eligible_future": None, + "eco4_eligible_future_message": None, + "tenure": None, + "heating_description": None, } ) continue @@ -641,6 +651,11 @@ def get_ha_15data(ha_data, cleaned, cleaning_data, created_at): eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() + # If the house is not identified, we do a full gbis and eco4 check + # TODO: Add in ECO4 check + eligibility.check_gbis() + eligibility.check_eco4() + if eligibility.eco4_warmfront["eligible"]: scoring_dictionary = prepare_model_data_row( property_id=house["row_id"], @@ -661,6 +676,12 @@ def get_ha_15data(ha_data, cleaned, cleaning_data, created_at): "walls": eligibility.walls["clean_description"], "date_epc": eligibility.epc["lodgement-date"], "message": "eco4 conditional on post sap", + "gbis_eligible_future": eligibility.gbis["eligible"], + "gbis_eligible_future_message": eligibility.gbis["message"], + "eco4_eligible_future": eligibility.eco4["eligible"], + "eco4_eligible_future_message": eligibility.eco4["message"], + "tenure": eligibility.tenure, + "heating_description": eligibility.epc["mainheat-description"], } ) continue @@ -676,7 +697,13 @@ def get_ha_15data(ha_data, cleaned, cleaning_data, created_at): "roof": eligibility.roof["clean_description"], "walls": eligibility.walls["clean_description"], "date_epc": eligibility.epc["lodgement-date"], - "message": None + "message": None, + "gbis_eligible_future": eligibility.gbis["eligible"], + "gbis_eligible_future_message": eligibility.gbis["message"], + "eco4_eligible_future": eligibility.eco4["eligible"], + "eco4_eligible_future_message": eligibility.eco4["message"], + "tenure": eligibility.tenure, + "heating_description": eligibility.epc["mainheat-description"], } ) @@ -794,6 +821,7 @@ def analyse_ha_32_results(results, ha32, no_house_numbers): """ results_df = pd.DataFrame(results) + results_df["tenure"] = results_df["tenure"].fillna("Unknown - probably new build") # What proportio warmfront_identified = results_df[ @@ -857,21 +885,32 @@ def analyse_ha_32_results(results, ha32, no_house_numbers): new_possibilities = results_df[ (~results_df["warmfront_identified"]) & - (results_df["gbis_eligible"] | results_df["eco4_eligible"]) + (results_df["gbis_eligible"] | results_df["eco4_eligible"]) & + (results_df["tenure"] == "Rented (social)") ].copy() - new_possibilities_full_gbis = results_df[ + future_possibilities_eco = results_df[ (~results_df["warmfront_identified"]) & - (results_df["gbis_eligible_future"] == True) + (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) ].copy() + future_possibilities_eco["eco4_eligible_future_message"].value_counts() + + future_possibilities_gbis = results_df[ + (~results_df["warmfront_identified"]) & + (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & ( + ~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) + ].copy() + + future_possibilities_gbis["gbis_eligible_future_message"].value_counts() + # We deem that Any EPC that is produced in the last 3 years gives us good confidence cutoff_date = datetime.now() - timedelta(days=3 * 365) new_possibilities["high_confidence"] = pd.to_datetime(new_possibilities["date_epc"]) >= cutoff_date - new_possibilities_full_gbis["high_confidence"] = pd.to_datetime( - new_possibilities_full_gbis["date_epc"]) >= cutoff_date + future_possibilities_eco["high_confidence"] = pd.to_datetime( + future_possibilities_eco["date_epc"]) >= cutoff_date # We do a quick check on properties that didn't have a house number: no_house_numbers_ha32 = ha32[ha32["row_id"].isin(no_house_numbers)]["identified"].sum() @@ -881,8 +920,10 @@ def analyse_ha_32_results(results, ha32, no_house_numbers): new = { "n_new_possibilities": new_possibilities.shape[0], "new_possibilities_confidence": new_possibilities["high_confidence"].value_counts(), - "new_possibilities_full_gbis": new_possibilities_full_gbis.shape[0], - "new_possibilities_full_gbis_confidence": new_possibilities_full_gbis["high_confidence"].value_counts() + "future_possibilities_gbis": future_possibilities_gbis.shape[0], + "future_possibilities_gbis_confidence": future_possibilities_gbis["high_confidence"].value_counts(), + "future_possibilities_eco": future_possibilities_eco.shape[0], + "future_possibilities_eco_confidence": future_possibilities_eco["high_confidence"].value_counts(), } return success_rate, new @@ -898,6 +939,8 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers): For HA32, most of these (if not all) properties were identified under gbis """ + results_df["tenure"] = results_df["tenure"].fillna("Unknown - probably new build") + # What proportio warmfront_identified = results_df[ results_df["warmfront_identified"] @@ -930,6 +973,7 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers): ] sap_low_enough["walls"].value_counts() + z = ha15[ha15["row_id"].isin(sap_too_high["row_id"].values)] investigate_1 = ha15[ha15["row_id"].isin(sap_too_high["row_id"])][ ["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]] @@ -943,7 +987,29 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers): new_possibilities = results_df[ (~results_df["warmfront_identified"]) & - ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) + ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) & + (results_df["tenure"] == "Rented (social)") + ].copy() + + # These are future possibilityies + new_possibilities_eco = results_df[ + (~results_df["warmfront_identified"]) & + (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) + ].copy() + + rids = new_possibilities_eco[new_possibilities_eco["sap"] == 54]["row_id"] + z = ha15[ha15["row_id"].isin(rids)] + + new_possibilities_gbis = results_df[ + (~results_df["warmfront_identified"]) & + (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & ( + ~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) + ].copy() + + not_new = results_df[ + (~results_df["warmfront_identified"]) & + (results_df["gbis_eligible_future"] != True) & (results_df["eco4_eligible_future"] != True) & ( + ~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) ].copy() # We deem that Any EPC that is produced in the last 3 years gives us good confidence for GBIS @@ -1026,6 +1092,13 @@ def app(): # f # ) + # with open("ha32.pickle", "rb") as f: + # ha32_dict = pickle.load(f) + + # ha32_results = ha32_dict["ha32_results"] + # ha32_scoring_data = ha32_dict["ha32_scoring_data"] + # ha32_no_house_numbers = ha32_dict["ha32_no_house_numbers"] + ha32_success_rate, ha32_new_possibilities = analyse_ha_32_results( results=ha32_results, ha32=ha32, no_house_numbers=ha32_no_house_numbers ) @@ -1044,5 +1117,7 @@ def app(): # ) ha15_success_rate, ha15_new, ha15_identified_results, ha15_missed_results = analyse_ha_15_results( - ha15_results_df, ha15, ha15_no_house_numbers + results_df=ha15_results_df, + ha15=ha15, + no_house_numbers=ha15_no_house_numbers )