Adding more ha analysis

This commit is contained in:
Khalim Conn-Kowlessar 2023-12-14 10:16:57 +00:00
parent df989ba918
commit 707724cdb1

View file

@ -472,6 +472,8 @@ def get_ha_32data(ha_data, cleaned, cleaning_data, created_at):
"message": "No EPC found",
"gbis_eligible_future": None,
"gbis_eligible_future_message": None,
"eco4_eligible_future": None,
"eco4_eligible_future_message": None,
"tenure": None,
"heating_description": None,
}
@ -529,6 +531,8 @@ def get_ha_32data(ha_data, cleaned, cleaning_data, created_at):
"message": "eco4 conditional on post sap",
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
"tenure": eligibility.tenure,
"heating_description": eligibility.epc["mainheat-description"],
}
@ -549,6 +553,8 @@ def get_ha_32data(ha_data, cleaned, cleaning_data, created_at):
"message": None,
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
"tenure": eligibility.tenure,
"heating_description": eligibility.epc["mainheat-description"],
}
@ -613,6 +619,10 @@ def get_ha_15data(ha_data, cleaned, cleaning_data, created_at):
"walls": None,
"date_epc": None,
"message": "No EPC found",
"eco4_eligible_future": None,
"eco4_eligible_future_message": None,
"tenure": None,
"heating_description": None,
}
)
continue
@ -641,6 +651,11 @@ def get_ha_15data(ha_data, cleaned, cleaning_data, created_at):
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If the house is not identified, we do a full gbis and eco4 check
# TODO: Add in ECO4 check
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
scoring_dictionary = prepare_model_data_row(
property_id=house["row_id"],
@ -661,6 +676,12 @@ def get_ha_15data(ha_data, cleaned, cleaning_data, created_at):
"walls": eligibility.walls["clean_description"],
"date_epc": eligibility.epc["lodgement-date"],
"message": "eco4 conditional on post sap",
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
"tenure": eligibility.tenure,
"heating_description": eligibility.epc["mainheat-description"],
}
)
continue
@ -676,7 +697,13 @@ def get_ha_15data(ha_data, cleaned, cleaning_data, created_at):
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"date_epc": eligibility.epc["lodgement-date"],
"message": None
"message": None,
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
"tenure": eligibility.tenure,
"heating_description": eligibility.epc["mainheat-description"],
}
)
@ -794,6 +821,7 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
"""
results_df = pd.DataFrame(results)
results_df["tenure"] = results_df["tenure"].fillna("Unknown - probably new build")
# What proportio
warmfront_identified = results_df[
@ -857,21 +885,32 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
new_possibilities = results_df[
(~results_df["warmfront_identified"]) &
(results_df["gbis_eligible"] | results_df["eco4_eligible"])
(results_df["gbis_eligible"] | results_df["eco4_eligible"]) &
(results_df["tenure"] == "Rented (social)")
].copy()
new_possibilities_full_gbis = results_df[
future_possibilities_eco = results_df[
(~results_df["warmfront_identified"]) &
(results_df["gbis_eligible_future"] == True)
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
future_possibilities_eco["eco4_eligible_future_message"].value_counts()
future_possibilities_gbis = results_df[
(~results_df["warmfront_identified"]) &
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
future_possibilities_gbis["gbis_eligible_future_message"].value_counts()
# We deem that Any EPC that is produced in the last 3 years gives us good confidence
cutoff_date = datetime.now() - timedelta(days=3 * 365)
new_possibilities["high_confidence"] = pd.to_datetime(new_possibilities["date_epc"]) >= cutoff_date
new_possibilities_full_gbis["high_confidence"] = pd.to_datetime(
new_possibilities_full_gbis["date_epc"]) >= cutoff_date
future_possibilities_eco["high_confidence"] = pd.to_datetime(
future_possibilities_eco["date_epc"]) >= cutoff_date
# We do a quick check on properties that didn't have a house number:
no_house_numbers_ha32 = ha32[ha32["row_id"].isin(no_house_numbers)]["identified"].sum()
@ -881,8 +920,10 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
new = {
"n_new_possibilities": new_possibilities.shape[0],
"new_possibilities_confidence": new_possibilities["high_confidence"].value_counts(),
"new_possibilities_full_gbis": new_possibilities_full_gbis.shape[0],
"new_possibilities_full_gbis_confidence": new_possibilities_full_gbis["high_confidence"].value_counts()
"future_possibilities_gbis": future_possibilities_gbis.shape[0],
"future_possibilities_gbis_confidence": future_possibilities_gbis["high_confidence"].value_counts(),
"future_possibilities_eco": future_possibilities_eco.shape[0],
"future_possibilities_eco_confidence": future_possibilities_eco["high_confidence"].value_counts(),
}
return success_rate, new
@ -898,6 +939,8 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
For HA32, most of these (if not all) properties were identified under gbis
"""
results_df["tenure"] = results_df["tenure"].fillna("Unknown - probably new build")
# What proportio
warmfront_identified = results_df[
results_df["warmfront_identified"]
@ -930,6 +973,7 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
]
sap_low_enough["walls"].value_counts()
z = ha15[ha15["row_id"].isin(sap_too_high["row_id"].values)]
investigate_1 = ha15[ha15["row_id"].isin(sap_too_high["row_id"])][
["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
@ -943,7 +987,29 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
new_possibilities = results_df[
(~results_df["warmfront_identified"]) &
((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True))
((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) &
(results_df["tenure"] == "Rented (social)")
].copy()
# These are future possibilityies
new_possibilities_eco = results_df[
(~results_df["warmfront_identified"]) &
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
rids = new_possibilities_eco[new_possibilities_eco["sap"] == 54]["row_id"]
z = ha15[ha15["row_id"].isin(rids)]
new_possibilities_gbis = results_df[
(~results_df["warmfront_identified"]) &
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
not_new = results_df[
(~results_df["warmfront_identified"]) &
(results_df["gbis_eligible_future"] != True) & (results_df["eco4_eligible_future"] != True) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
# We deem that Any EPC that is produced in the last 3 years gives us good confidence for GBIS
@ -1026,6 +1092,13 @@ def app():
# f
# )
# with open("ha32.pickle", "rb") as f:
# ha32_dict = pickle.load(f)
# ha32_results = ha32_dict["ha32_results"]
# ha32_scoring_data = ha32_dict["ha32_scoring_data"]
# ha32_no_house_numbers = ha32_dict["ha32_no_house_numbers"]
ha32_success_rate, ha32_new_possibilities = analyse_ha_32_results(
results=ha32_results, ha32=ha32, no_house_numbers=ha32_no_house_numbers
)
@ -1044,5 +1117,7 @@ def app():
# )
ha15_success_rate, ha15_new, ha15_identified_results, ha15_missed_results = analyse_ha_15_results(
ha15_results_df, ha15, ha15_no_house_numbers
results_df=ha15_results_df,
ha15=ha15,
no_house_numbers=ha15_no_house_numbers
)