tweaking postal region algorithm - may need to swap to postcode or street

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-17 16:45:10 +00:00
parent 7d209d5d8e
commit a01ff1d8de

View file

@ -1803,22 +1803,43 @@ def propsed_wave_3_sample():
for a_id in missed_addressids:
property = asset_list[asset_list["Address ID"] == a_id].squeeze()
# TODO: This is quite strict for the moment - we might want to relax this by creating reduced versions
# of the wall, roof and heating features, splitting them on the colons and taking the first part
if property["Property Type"].split(":")[0] in ["House", "Bungalow"]:
filter_property_types = ["House", "Bungalow"]
else:
filter_property_types = ["Flat"]
surveyed_similar = survey_results_with_original_features[
(survey_results_with_original_features["Postcode"] == property["Postcode"]) &
(survey_results_with_original_features["Property Type"] == property["Property Type"]) &
(survey_results_with_original_features["Wall Type"] == property["Wall Type"]) &
(survey_results_with_original_features["Roof Type"] == property["Roof Type"]) &
(survey_results_with_original_features["Heating"] == property["Heating"])
(
survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
filter_property_types
)
) &
(
survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
property["Wall Type"].split(":")[0]
) &
(
survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
property["Roof Type"].split(":")[0]
) &
(
survey_results_with_original_features["Heating"].str.split(":").str[0] ==
property["Heating"].split(":")[0]
)
]
if surveyed_similar.empty:
surveyed_similar = survey_results_with_original_features[
(survey_results_with_original_features["Postal Region"] == property["Postal Region"]) &
(survey_results_with_original_features["Property Type"] == property["Property Type"]) &
(survey_results_with_original_features["Wall Type"] == property["Wall Type"]) &
(survey_results_with_original_features["Roof Type"] == property["Roof Type"]) &
(survey_results_with_original_features["Heating"] == property["Heating"])
(survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
filter_property_types
)) &
(survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
property["Wall Type"].split(":")[0]) &
(survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
property["Roof Type"].split(":")[0]) &
(survey_results_with_original_features["Heating"].str.split(":").str[0] ==
property["Heating"].split(":")[0])
]
if surveyed_similar.empty:
@ -1842,7 +1863,7 @@ def propsed_wave_3_sample():
{
"Address ID": a_id,
"Confidence Tier": tier,
"Current EPC Band": "Unknown"
"Current EPC Band": expected_epc
}
)
@ -1899,6 +1920,7 @@ def propsed_wave_3_sample():
geographic_summary = geographic_summary.sort_values("Loss", ascending=True)
geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum()
geographic_summary[geographic_summary["Loss Cumulative Sum"] <= 250]["Gain"].sum()
# if __name__ == "__main__":
# main()