mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
messing around with street match
This commit is contained in:
parent
1b38832e27
commit
67f97feb18
1 changed files with 74 additions and 31 deletions
|
|
@ -1637,7 +1637,7 @@ def propsed_wave_3_sample():
|
|||
|
||||
# TODO: We drop 302 properties that are not priority postcodes - confirm w/ Stonewater and 7 properties missing
|
||||
# UPRN
|
||||
asset_list = asset_list[~asset_list["Archetype ID"].isin(["NOT PRIORITY POSTCODE", "MISSING UPRN"])]
|
||||
asset_list = asset_list[~asset_list["Archetype ID"].isin(["MISSING UPRN"])]
|
||||
# Clean address ids
|
||||
asset_list = asset_list[~pd.isnull(asset_list["Address ID"])]
|
||||
asset_list = asset_list[asset_list["Address ID"] != "Address ID"]
|
||||
|
|
@ -1645,12 +1645,13 @@ def propsed_wave_3_sample():
|
|||
|
||||
# Create the postal region, taking the first part of the postcode
|
||||
asset_list["Postal Region"] = asset_list["Postcode"].str.split(" ").str[0]
|
||||
asset_list["Street and Region"] = asset_list["Street name"] + " " + asset_list["Postal Region"]
|
||||
unique_postal_regions = asset_list["Postal Region"].unique()
|
||||
|
||||
# Keep just the columns we need
|
||||
asset_list = asset_list[
|
||||
["UPRN", "Address ID", "Archetype ID", "Postal Region", "Postcode", "Property Type", "Wall Type", "Roof Type",
|
||||
"Heating"]
|
||||
["UPRN", "Address ID", "Archetype ID", "Postal Region", "Postcode", "Street and Region",
|
||||
"Property Type", "Wall Type", "Roof Type", "Heating"]
|
||||
]
|
||||
|
||||
survey_results = pd.read_excel(
|
||||
|
|
@ -1853,7 +1854,6 @@ def propsed_wave_3_sample():
|
|||
suffixes=("", "_method2")
|
||||
)
|
||||
else:
|
||||
|
||||
region_assets = region_assets.merge(
|
||||
archetype_surveyed,
|
||||
on="Archetype ID",
|
||||
|
|
@ -1903,20 +1903,20 @@ def propsed_wave_3_sample():
|
|||
|
||||
surveyed = survey_results_with_original_features[
|
||||
(
|
||||
survey_results_with_original_features["Property Type"].str.split(":").str[0] ==
|
||||
property["Property Type"].split(":")[0]
|
||||
survey_results_with_original_features["Property Type"] ==
|
||||
property["Property Type"]
|
||||
) &
|
||||
(
|
||||
survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
|
||||
property["Wall Type"].split(":")[0]
|
||||
survey_results_with_original_features["Wall Type"] ==
|
||||
property["Wall Type"]
|
||||
) &
|
||||
(
|
||||
survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
|
||||
property["Roof Type"].split(":")[0]
|
||||
survey_results_with_original_features["Roof Type"] ==
|
||||
property["Roof Type"]
|
||||
) &
|
||||
(
|
||||
survey_results_with_original_features["Heating"].str.split(":").str[0] ==
|
||||
property["Heating"].split(":")[0]
|
||||
survey_results_with_original_features["Heating"] ==
|
||||
property["Heating"]
|
||||
)
|
||||
].copy()
|
||||
|
||||
|
|
@ -1962,7 +1962,10 @@ def propsed_wave_3_sample():
|
|||
if "Electric" in property["Heating"]:
|
||||
# Take other electric heating systems
|
||||
surveyed = surveyed[surveyed["Heating"].str.contains("Electric")]
|
||||
elif property["Heating"] == "Community Heating Systems: Community boilers only (RdSAP)":
|
||||
elif property["Heating"] in [
|
||||
"Community Heating Systems: Community boilers only (RdSAP)",
|
||||
"Community Heating Systems: Community CHP and boilers (RdSAP)"
|
||||
]:
|
||||
# Take other community heating systems
|
||||
surveyed = surveyed[surveyed["Heating"].str.contains("Community")]
|
||||
elif property["Heating"] == 'Heat Pump: (from database)':
|
||||
|
|
@ -2001,8 +2004,8 @@ def propsed_wave_3_sample():
|
|||
if any(surveyed["Postal Region"] == property["Postal Region"]):
|
||||
surveyed = surveyed[surveyed["Postal Region"] == property["Postal Region"]]
|
||||
|
||||
# Take the 5 nearest
|
||||
surveyed = surveyed.head(5)
|
||||
# Take the 3 nearest
|
||||
surveyed = surveyed.head(3)
|
||||
|
||||
# # We allow a max distance of 10km
|
||||
# surveyed = surveyed[surveyed["distance_meters"] < 10000]
|
||||
|
|
@ -2176,6 +2179,9 @@ def propsed_wave_3_sample():
|
|||
|
||||
results = pd.concat(results)
|
||||
|
||||
# home = results[results["Confidence Tier"] == "5 - EPC C or above"].sample(1)
|
||||
# region = home["Postal Region"].values[0]
|
||||
|
||||
# Create a pivot table for counts of Confidence Tier by Postal Region
|
||||
geographic_summary = results.pivot_table(
|
||||
index='Postal Region',
|
||||
|
|
@ -2192,7 +2198,9 @@ def propsed_wave_3_sample():
|
|||
# '3 - similar property, weighted on distance'
|
||||
|
||||
gain_columns = [
|
||||
'1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype',
|
||||
'1 - Archetype surveyed',
|
||||
'1 - property was surveyed',
|
||||
'2 - same archetype',
|
||||
'3 - similar property, weighted on distance'
|
||||
]
|
||||
#
|
||||
|
|
@ -2200,8 +2208,11 @@ def propsed_wave_3_sample():
|
|||
# '4 - no similar property, needs survey to confirm',
|
||||
# '5 - EPC C or above', '5 - property was surveyed'
|
||||
|
||||
loss_columns = ['4 - no similar property, needs survey to confirm', '5 - EPC C or above',
|
||||
'5 - property was surveyed']
|
||||
loss_columns = [
|
||||
'4 - no similar property, needs survey to confirm',
|
||||
'5 - EPC C or above',
|
||||
'5 - property was surveyed'
|
||||
]
|
||||
geographic_summary["Gain"] = geographic_summary[gain_columns].sum(axis=1)
|
||||
geographic_summary["Loss"] = geographic_summary[loss_columns].sum(axis=1)
|
||||
|
||||
|
|
@ -2249,26 +2260,30 @@ def propsed_wave_3_sample():
|
|||
# We now see if there are any postcodes that have no loss that can be added
|
||||
unselected_regions = geographic_summary[~geographic_summary["Selected"]]["Postal Region"].values
|
||||
|
||||
# TODO: Try on street
|
||||
|
||||
postcode_summary = results.pivot_table(
|
||||
index='Postcode',
|
||||
index='Street and Region',
|
||||
columns='Confidence Tier',
|
||||
aggfunc='size',
|
||||
fill_value=0
|
||||
).reset_index()
|
||||
postcode_summary = postcode_summary.merge(
|
||||
results[["Postcode", "Postal Region"]].drop_duplicates(),
|
||||
how="left", on="Postcode"
|
||||
)
|
||||
|
||||
postcode_summary_unselected_regions = postcode_summary[
|
||||
postcode_summary["Postcode"].str.split(" ").str[0].isin(unselected_regions)
|
||||
].copy()
|
||||
# postcode_summary = postcode_summary.merge(
|
||||
# results[["Postcode", "Postal Region"]].drop_duplicates(),
|
||||
# how="left", on="Postcode"
|
||||
# )
|
||||
#
|
||||
postcode_summary_unselected_regions = postcode_summary.copy()
|
||||
# postcode_summary_unselected_regions = postcode_summary[
|
||||
# postcode_summary["Postcode"].str.split(" ").str[0].isin(unselected_regions)
|
||||
# ].copy()
|
||||
|
||||
postcode_summary_unselected_regions["Gain"] = postcode_summary_unselected_regions[gain_columns].sum(axis=1)
|
||||
postcode_summary_unselected_regions["Loss"] = postcode_summary_unselected_regions[loss_columns].sum(axis=1)
|
||||
|
||||
# Remaining loss allowed
|
||||
remaining_loss_constraint = 250 - region_totals["Loss"]
|
||||
# remaining_loss_constraint = 230 - region_totals["Loss"]
|
||||
remaining_loss_constraint = 250
|
||||
postcode_selected_rows, _ = optimise(
|
||||
gain=postcode_summary_unselected_regions["Gain"].values,
|
||||
loss=postcode_summary_unselected_regions["Loss"].values,
|
||||
|
|
@ -2284,12 +2299,40 @@ def propsed_wave_3_sample():
|
|||
|
||||
postcode_totals = postcode_optimised_additional_properties[["Gain", "Loss"]].sum()
|
||||
|
||||
bid_size = region_totals.sum() + postcode_totals.sum()
|
||||
bid_size = postcode_totals.sum()
|
||||
print("Bid Size:", bid_size)
|
||||
total_epc_d_or_below = region_totals["Gain"] + postcode_totals["Gain"]
|
||||
total_epc_d_or_below = postcode_totals["Gain"]
|
||||
print("Total EPC D or below:", total_epc_d_or_below)
|
||||
total_epc_c = region_totals["Loss"] + postcode_totals["Loss"]
|
||||
total_epc_c = postcode_totals["Loss"]
|
||||
print("Total EPC C or above:", total_epc_c)
|
||||
# Total needing a survey
|
||||
total_needing_survey = postcode_optimised_additional_properties[
|
||||
"4 - no similar property, needs survey to confirm"
|
||||
].sum()
|
||||
print("Total needing survey:", total_needing_survey)
|
||||
|
||||
# Look for postcodes that have no loss
|
||||
unselected_streets = postcode_summary_unselected_regions[
|
||||
~postcode_summary_unselected_regions["Selected"]
|
||||
]["Street and Region"].values
|
||||
|
||||
postcode_summary2 = results[
|
||||
results["Street and Region"].isin(unselected_streets)
|
||||
].pivot_table(
|
||||
index='Postcode',
|
||||
columns='Confidence Tier',
|
||||
aggfunc='size',
|
||||
fill_value=0
|
||||
).reset_index()
|
||||
|
||||
postcode_summary2["Gain"] = postcode_summary2[gain_columns].sum(axis=1)
|
||||
postcode_summary2["Loss"] = postcode_summary2[loss_columns].sum(axis=1)
|
||||
|
||||
no_loss_postcodes = postcode_summary2[postcode_summary2["Loss"] == 0].sort_values("Gain", ascending=False)
|
||||
total_bid_size = bid_size + no_loss_postcodes["Gain"].sum()
|
||||
print(total_bid_size)
|
||||
|
||||
z = results[results["Confidence Tier"] == "5 - EPC C or above"]
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue