diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index ccceb05f..48bfeb2c 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -833,6 +833,18 @@ def analyse_ha_32_results(results, ha32, no_house_numbers): results_df["warmfront_identified"] ] + # Aggregates of no eco and gbis jobs identified + n_eco = results_df["eco4_eligible"].sum() + # Gbis is rows where eco4 is not eligible + n_gbis = results_df[ + (results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False) + ]["gbis_eligible"].sum() + + pipeline_potential = results_df[ + (results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | ( + results_df["gbis_eligible"] == True) + ] + success_rate = warmfront_identified["gbis_eligible"].sum() / warmfront_identified.shape[0] # For HA32, this is 89% @@ -890,8 +902,16 @@ def analyse_ha_32_results(results, ha32, no_house_numbers): new_possibilities = results_df[ (~results_df["warmfront_identified"]) & - (results_df["gbis_eligible"] | results_df["eco4_eligible"]) & - (results_df["tenure"] == "Rented (social)") + (results_df["gbis_eligible"] | results_df["eco4_eligible"]) + ].copy() + + new_possibilities_eco = results_df[ + (~results_df["warmfront_identified"]) & + (results_df["eco4_eligible"] == True) + ].copy() + new_possibilities_gbis = results_df[ + (~results_df["warmfront_identified"]) & + (results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True) ].copy() future_possibilities_eco = results_df[ @@ -959,6 +979,11 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers): "eligibility_classification"].value_counts() # For HA15 this is 50.3% + pipeline_potential = results_df[ + (results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | ( + results_df["gbis_eligible"] == True) + ] + # of the properties we identify, what is the mix of confidenc missed = results_df[ @@ -977,32 +1002,32 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers): missed["sap"] < 69 ] - sap_low_enough["walls"].value_counts() - z = ha15[ha15["row_id"].isin(sap_too_high["row_id"].values)] - - investigate_1 = ha15[ha15["row_id"].isin(sap_too_high["row_id"])][ - ["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]] - - investigate_2 = ha15[ha15["row_id"].isin(sap_low_enough["row_id"])][ - ["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]] - - missed["message"].value_counts() + # Aggregates of no eco and gbis jobs identified + n_eco = results_df["eco4_eligible"].sum() + # Gbis is rows where eco4 is not eligible + n_gbis = results_df[ + (results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False) + ]["gbis_eligible"].sum() # We now look for properties that we identified, that were not identified by Warmfront new_possibilities = results_df[ (~results_df["warmfront_identified"]) & - ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) & - (results_df["tenure"] == "Rented (social)") + ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) + ].copy() + + new_possibilities_eco = results_df[ + (~results_df["warmfront_identified"]) & + (results_df["eco4_eligible"] == True) ].copy() # These are future possibilityies - new_possibilities_eco = results_df[ + future_possibilities_eco = results_df[ (~results_df["warmfront_identified"]) & (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) ].copy() - new_possibilities_gbis = results_df[ + future_possibilities_gbis = results_df[ (~results_df["warmfront_identified"]) & (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & ( ~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) diff --git a/etl/eligibility/ha_15_32/ha33_app.py b/etl/eligibility/ha_15_32/ha33_app.py index 9af5eae2..42c8fa81 100644 --- a/etl/eligibility/ha_15_32/ha33_app.py +++ b/etl/eligibility/ha_15_32/ha33_app.py @@ -264,21 +264,21 @@ def get_ha_33data(data, cleaned, cleaning_data, created_at): def analyse_ha_33(results_df, data): - results_df_social = results_df[results_df["tenure"] == "Rented (social)"] + # results_df_social = results_df[results_df["tenure"] == "Rented (social)"] + # + # results_df_social["tenure"].value_counts() - results_df_social["tenure"].value_counts() + data[data["row_id"].isin(results_df["row_id"].values)]["PROPERTY TYPE"].value_counts() - data[data["row_id"].isin(results_df_social["row_id"].values)]["PROPERTY TYPE"].value_counts() + n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum() + n_eco4 = results_df["eco4_eligible"].sum() + n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum() - n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum() - n_eco4 = results_df_social["eco4_eligible"].sum() - n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum() - - eco_eligibile = results_df_social[results_df_social["eco4_eligible"]] + eco_eligibile = results_df[results_df["eco4_eligible"]] eco_eligibile["walls"].value_counts() eco_eligibile["roof"].value_counts() - results_df_social[results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]]["tenure"].value_counts() + results_df[results_df["gbis_eligible"] | results_df["eco4_eligible"]]["tenure"].value_counts() results_df_social["eligibility_classification"].value_counts() @@ -316,3 +316,11 @@ def app(): created_at = datetime.now().isoformat() results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at) + + # Read in + import pickle + with open("ha33_results.pickle", "rb") as f: + data = pickle.load(f) + results_df = pd.DataFrame(data["results"]) + scoring_data = data["scoring_data"] + nodata = data["nodata"] diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py index 8a404eec..92b03539 100644 --- a/etl/eligibility/ha_15_32/ha4_app.py +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -241,15 +241,11 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): def analyse_ha_4(results_df, data): - results_df_social = results_df[results_df["tenure"] == "Rented (social)"] + n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum() + n_eco4 = results_df["eco4_eligible"].sum() + n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum() - results_df_social["property_type"].value_counts() - - n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum() - n_eco4 = results_df_social["eco4_eligible"].sum() - n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum() - - eco_eligibile = results_df_social[results_df_social["eco4_eligible"]] + eco_eligibile = results_df[results_df["eco4_eligible"]] eco_eligibile["eligibility_classification"].value_counts() future_possibilities_eco = results_df[ @@ -299,3 +295,11 @@ def app(): # "scoring_data": scoring_data, # "nodata": nodata # }, f) + + # Read in + # import pickle + # with open("ha_4.pickle", "rb") as f: + # data = pickle.load(f) + # results_df = data["results_df"] + # scoring_data = data["scoring_data"] + # nodata = data["nodata"]