messing around with street match

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-17 22:33:42 +00:00
parent 1b38832e27
commit 67f97feb18

View file

@ -1637,7 +1637,7 @@ def propsed_wave_3_sample():
# TODO: We drop 302 properties that are not priority postcodes - confirm w/ Stonewater and 7 properties missing
# UPRN
asset_list = asset_list[~asset_list["Archetype ID"].isin(["NOT PRIORITY POSTCODE", "MISSING UPRN"])]
asset_list = asset_list[~asset_list["Archetype ID"].isin(["MISSING UPRN"])]
# Clean address ids
asset_list = asset_list[~pd.isnull(asset_list["Address ID"])]
asset_list = asset_list[asset_list["Address ID"] != "Address ID"]
@ -1645,12 +1645,13 @@ def propsed_wave_3_sample():
# Create the postal region, taking the first part of the postcode
asset_list["Postal Region"] = asset_list["Postcode"].str.split(" ").str[0]
asset_list["Street and Region"] = asset_list["Street name"] + " " + asset_list["Postal Region"]
unique_postal_regions = asset_list["Postal Region"].unique()
# Keep just the columns we need
asset_list = asset_list[
["UPRN", "Address ID", "Archetype ID", "Postal Region", "Postcode", "Property Type", "Wall Type", "Roof Type",
"Heating"]
["UPRN", "Address ID", "Archetype ID", "Postal Region", "Postcode", "Street and Region",
"Property Type", "Wall Type", "Roof Type", "Heating"]
]
survey_results = pd.read_excel(
@ -1853,7 +1854,6 @@ def propsed_wave_3_sample():
suffixes=("", "_method2")
)
else:
region_assets = region_assets.merge(
archetype_surveyed,
on="Archetype ID",
@ -1903,20 +1903,20 @@ def propsed_wave_3_sample():
surveyed = survey_results_with_original_features[
(
survey_results_with_original_features["Property Type"].str.split(":").str[0] ==
property["Property Type"].split(":")[0]
survey_results_with_original_features["Property Type"] ==
property["Property Type"]
) &
(
survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
property["Wall Type"].split(":")[0]
survey_results_with_original_features["Wall Type"] ==
property["Wall Type"]
) &
(
survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
property["Roof Type"].split(":")[0]
survey_results_with_original_features["Roof Type"] ==
property["Roof Type"]
) &
(
survey_results_with_original_features["Heating"].str.split(":").str[0] ==
property["Heating"].split(":")[0]
survey_results_with_original_features["Heating"] ==
property["Heating"]
)
].copy()
@ -1962,7 +1962,10 @@ def propsed_wave_3_sample():
if "Electric" in property["Heating"]:
# Take other electric heating systems
surveyed = surveyed[surveyed["Heating"].str.contains("Electric")]
elif property["Heating"] == "Community Heating Systems: Community boilers only (RdSAP)":
elif property["Heating"] in [
"Community Heating Systems: Community boilers only (RdSAP)",
"Community Heating Systems: Community CHP and boilers (RdSAP)"
]:
# Take other community heating systems
surveyed = surveyed[surveyed["Heating"].str.contains("Community")]
elif property["Heating"] == 'Heat Pump: (from database)':
@ -2001,8 +2004,8 @@ def propsed_wave_3_sample():
if any(surveyed["Postal Region"] == property["Postal Region"]):
surveyed = surveyed[surveyed["Postal Region"] == property["Postal Region"]]
# Take the 5 nearest
surveyed = surveyed.head(5)
# Take the 3 nearest
surveyed = surveyed.head(3)
# # We allow a max distance of 10km
# surveyed = surveyed[surveyed["distance_meters"] < 10000]
@ -2176,6 +2179,9 @@ def propsed_wave_3_sample():
results = pd.concat(results)
# home = results[results["Confidence Tier"] == "5 - EPC C or above"].sample(1)
# region = home["Postal Region"].values[0]
# Create a pivot table for counts of Confidence Tier by Postal Region
geographic_summary = results.pivot_table(
index='Postal Region',
@ -2192,7 +2198,9 @@ def propsed_wave_3_sample():
# '3 - similar property, weighted on distance'
gain_columns = [
'1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype',
'1 - Archetype surveyed',
'1 - property was surveyed',
'2 - same archetype',
'3 - similar property, weighted on distance'
]
#
@ -2200,8 +2208,11 @@ def propsed_wave_3_sample():
# '4 - no similar property, needs survey to confirm',
# '5 - EPC C or above', '5 - property was surveyed'
loss_columns = ['4 - no similar property, needs survey to confirm', '5 - EPC C or above',
'5 - property was surveyed']
loss_columns = [
'4 - no similar property, needs survey to confirm',
'5 - EPC C or above',
'5 - property was surveyed'
]
geographic_summary["Gain"] = geographic_summary[gain_columns].sum(axis=1)
geographic_summary["Loss"] = geographic_summary[loss_columns].sum(axis=1)
@ -2249,26 +2260,30 @@ def propsed_wave_3_sample():
# We now see if there are any postcodes that have no loss that can be added
unselected_regions = geographic_summary[~geographic_summary["Selected"]]["Postal Region"].values
# TODO: Try on street
postcode_summary = results.pivot_table(
index='Postcode',
index='Street and Region',
columns='Confidence Tier',
aggfunc='size',
fill_value=0
).reset_index()
postcode_summary = postcode_summary.merge(
results[["Postcode", "Postal Region"]].drop_duplicates(),
how="left", on="Postcode"
)
postcode_summary_unselected_regions = postcode_summary[
postcode_summary["Postcode"].str.split(" ").str[0].isin(unselected_regions)
].copy()
# postcode_summary = postcode_summary.merge(
# results[["Postcode", "Postal Region"]].drop_duplicates(),
# how="left", on="Postcode"
# )
#
postcode_summary_unselected_regions = postcode_summary.copy()
# postcode_summary_unselected_regions = postcode_summary[
# postcode_summary["Postcode"].str.split(" ").str[0].isin(unselected_regions)
# ].copy()
postcode_summary_unselected_regions["Gain"] = postcode_summary_unselected_regions[gain_columns].sum(axis=1)
postcode_summary_unselected_regions["Loss"] = postcode_summary_unselected_regions[loss_columns].sum(axis=1)
# Remaining loss allowed
remaining_loss_constraint = 250 - region_totals["Loss"]
# remaining_loss_constraint = 230 - region_totals["Loss"]
remaining_loss_constraint = 250
postcode_selected_rows, _ = optimise(
gain=postcode_summary_unselected_regions["Gain"].values,
loss=postcode_summary_unselected_regions["Loss"].values,
@ -2284,12 +2299,40 @@ def propsed_wave_3_sample():
postcode_totals = postcode_optimised_additional_properties[["Gain", "Loss"]].sum()
bid_size = region_totals.sum() + postcode_totals.sum()
bid_size = postcode_totals.sum()
print("Bid Size:", bid_size)
total_epc_d_or_below = region_totals["Gain"] + postcode_totals["Gain"]
total_epc_d_or_below = postcode_totals["Gain"]
print("Total EPC D or below:", total_epc_d_or_below)
total_epc_c = region_totals["Loss"] + postcode_totals["Loss"]
total_epc_c = postcode_totals["Loss"]
print("Total EPC C or above:", total_epc_c)
# Total needing a survey
total_needing_survey = postcode_optimised_additional_properties[
"4 - no similar property, needs survey to confirm"
].sum()
print("Total needing survey:", total_needing_survey)
# Look for postcodes that have no loss
unselected_streets = postcode_summary_unselected_regions[
~postcode_summary_unselected_regions["Selected"]
]["Street and Region"].values
postcode_summary2 = results[
results["Street and Region"].isin(unselected_streets)
].pivot_table(
index='Postcode',
columns='Confidence Tier',
aggfunc='size',
fill_value=0
).reset_index()
postcode_summary2["Gain"] = postcode_summary2[gain_columns].sum(axis=1)
postcode_summary2["Loss"] = postcode_summary2[loss_columns].sum(axis=1)
no_loss_postcodes = postcode_summary2[postcode_summary2["Loss"] == 0].sort_values("Gain", ascending=False)
total_bid_size = bid_size + no_loss_postcodes["Gain"].sum()
print(total_bid_size)
z = results[results["Confidence Tier"] == "5 - EPC C or above"]
# if __name__ == "__main__":
# main()