tidying up optimisation process

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-19 08:41:44 +00:00
parent 5d5001fec3
commit d65c99f62a

View file

@ -2297,39 +2297,9 @@ def propsed_wave_3_sample():
if pd.isnull(results[c]).sum():
raise Exception("Something went wrong")
# home = results[results["Confidence Tier"] == "5 - EPC C or above"].sample(1)
# region = home["Postal Region"].values[0]
# Create a pivot table for counts of Confidence Tier by Postal Region
geographic_summary = results.pivot_table(
index='Postal Region',
columns='Confidence Tier',
aggfunc='size',
fill_value=0
).reset_index()
# We create the gain and loss columns
# Gain is the sum of these columns:
# '1 - Archetype surveyed',
# '1 - property was surveyed',
# '2 - same archetype',
# '3 - similar property, weighted on distance'
gain_columns = sorted([x for x in results["Confidence Tier"].unique() if "1 - " in x or "2 - " in x or "3 - " in x])
loss_columns = sorted([x for x in results["Confidence Tier"].unique() if "4 - " in x or "5 - " in x])
geographic_summary["Gain"] = geographic_summary[gain_columns].sum(axis=1)
geographic_summary["Loss"] = geographic_summary[loss_columns].sum(axis=1)
print(geographic_summary.sum())
geographic_summary = geographic_summary.sort_values("Loss", ascending=True)
geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum()
geographic_summary[geographic_summary["Loss Cumulative Sum"] <= 250]["Gain"].sum()
loss = geographic_summary["Loss"].values
gain = geographic_summary["Gain"].values
def optimise(gain, loss, max_loss=250):
# Define the coefficients for the objective function (negative because we maximize Gain)
@ -2352,76 +2322,51 @@ def propsed_wave_3_sample():
return selected_rows, optimal_gain
selected_rows, _ = optimise(gain, loss, 250)
# Select the rows that are selected
geographic_summary["Selected"] = selected_rows == 1
geographic_summary[geographic_summary["Selected"]].sum()
region_totals = geographic_summary[
geographic_summary["Selected"]
][["Gain", "Loss"]].sum()
# We now see if there are any postcodes that have no loss that can be added
unselected_regions = geographic_summary[~geographic_summary["Selected"]]["Postal Region"].values
# TODO: Try on street
postcode_summary = results.pivot_table(
street_summary = results.pivot_table(
index='Street and Region',
columns='Confidence Tier',
aggfunc='size',
fill_value=0
).reset_index()
# postcode_summary = postcode_summary.merge(
# results[["Postcode", "Postal Region"]].drop_duplicates(),
# how="left", on="Postcode"
# )
#
postcode_summary_unselected_regions = postcode_summary.copy()
# postcode_summary_unselected_regions = postcode_summary[
# postcode_summary["Postcode"].str.split(" ").str[0].isin(unselected_regions)
# ].copy()
postcode_summary_unselected_regions["Gain"] = postcode_summary_unselected_regions[gain_columns].sum(axis=1)
postcode_summary_unselected_regions["Loss"] = postcode_summary_unselected_regions[loss_columns].sum(axis=1)
street_summary["Gain"] = street_summary[gain_columns].sum(axis=1)
street_summary["Loss"] = street_summary[loss_columns].sum(axis=1)
# Remaining loss allowed
# remaining_loss_constraint = 230 - region_totals["Loss"]
remaining_loss_constraint = 220
postcode_selected_rows, _ = optimise(
gain=postcode_summary_unselected_regions["Gain"].values,
loss=postcode_summary_unselected_regions["Loss"].values,
max_loss=int(remaining_loss_constraint)
print(street_summary.sum())
selected_rows, _ = optimise(
gain=street_summary["Gain"].values,
loss=street_summary["Loss"].values,
max_loss=250
)
postcode_summary_unselected_regions["Selected"] = postcode_selected_rows == 1
postcode_summary_unselected_regions[postcode_summary_unselected_regions["Selected"]][["Gain", "Loss"]].sum()
street_summary["Selected"] = selected_rows == 1
print(street_summary[street_summary["Selected"]][["Gain", "Loss"]].sum())
postcode_optimised_additional_properties = postcode_summary_unselected_regions[
postcode_summary_unselected_regions["Selected"]
selected_streets = street_summary[
street_summary["Selected"]
]
postcode_totals = postcode_optimised_additional_properties[["Gain", "Loss"]].sum()
totals = selected_streets[["Gain", "Loss"]].sum()
bid_size = postcode_totals.sum()
bid_size = totals.sum()
print("Bid Size:", bid_size)
total_epc_d_or_below = postcode_totals["Gain"]
total_epc_d_or_below = totals["Gain"]
print("Total EPC D or below:", total_epc_d_or_below)
total_epc_c = postcode_totals["Loss"]
total_epc_c = totals["Loss"]
print("Total EPC C or above:", total_epc_c)
# Total needing a survey
total_needing_survey = postcode_optimised_additional_properties[
total_needing_survey = selected_streets[
"4 - no similar property, needs survey to confirm"
].sum()
print("Total needing survey:", total_needing_survey)
# Look for postcodes that have no loss
unselected_streets = postcode_summary_unselected_regions[
~postcode_summary_unselected_regions["Selected"]
unselected_streets = street_summary[
~street_summary["Selected"]
]["Street and Region"].values
postcode_summary2 = results[
postcode_summary = results[
results["Street and Region"].isin(unselected_streets)
].pivot_table(
index='Postcode',
@ -2430,14 +2375,12 @@ def propsed_wave_3_sample():
fill_value=0
).reset_index()
postcode_summary2["Gain"] = postcode_summary2[gain_columns].sum(axis=1)
postcode_summary2["Loss"] = postcode_summary2[loss_columns].sum(axis=1)
postcode_summary["Gain"] = postcode_summary[gain_columns].sum(axis=1)
postcode_summary["Loss"] = postcode_summary[loss_columns].sum(axis=1)
no_loss_postcodes = postcode_summary2[postcode_summary2["Loss"] == 0].sort_values("Gain", ascending=False)
no_loss_postcodes = postcode_summary[postcode_summary["Loss"] == 0].sort_values("Gain", ascending=False)
total_bid_size = bid_size + no_loss_postcodes["Gain"].sum()
print(total_bid_size)
z = results[results["Confidence Tier"] == "5 - EPC C or above"]
# if __name__ == "__main__":
# main()