2044 properties added

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-17 20:13:19 +00:00
parent a630fe05c4
commit 1b38832e27

View file

@ -1938,6 +1938,27 @@ def propsed_wave_3_sample():
)
].copy()
if surveyed.empty:
if property["Property Type"].split(":")[0] in ["House", "Bungalow", "Maisonette"]:
filter_property_types = ["House", "Bungalow", ]
else:
filter_property_types = ["Flat"]
surveyed = survey_results_with_original_features[
(
survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
filter_property_types
)
) &
(
survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
property["Wall Type"].split(":")[0]
) &
(
survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
property["Roof Type"].split(":")[0]
)
].copy()
if "Electric" in property["Heating"]:
# Take other electric heating systems
surveyed = surveyed[surveyed["Heating"].str.contains("Electric")]
@ -1950,6 +1971,9 @@ def propsed_wave_3_sample():
elif property["Heating"] == "Solid fuel room heaters: Open fire in grate":
# Take other properties with room heaters
surveyed = surveyed[surveyed["Heating"].str.contains("room heaters")]
elif "Boiler" in property["Heating"]:
# Take other properties with boilers
surveyed = surveyed[surveyed["Heating"].str.contains("Boiler")]
else:
raise Exception("Fix me")
@ -1972,17 +1996,29 @@ def propsed_wave_3_sample():
# Check if we have a postcode match check if surveyed postcode is the same as the property postcode
if any(surveyed["Postcode"] == property["Postcode"]):
surveyed_similar = surveyed[surveyed["Postcode"] == property["Postcode"]]
surveyed = surveyed[surveyed["Postcode"] == property["Postcode"]]
if any(surveyed["Postal Region"] == property["Postal Region"]):
surveyed_similar = surveyed[surveyed["Postal Region"] == property["Postal Region"]]
surveyed = surveyed[surveyed["Postal Region"] == property["Postal Region"]]
# Take the 5 nearest
surveyed_similar = surveyed_similar.head(5)
surveyed = surveyed.head(5)
# # We allow a max distance of 10km
# surveyed = surveyed[surveyed["distance_meters"] < 10000]
# if surveyed.empty:
# final_missed_matches.append(
# {
# "Address ID": a_id,
# "Confidence Tier": "4 - no similar property, needs survey to confirm",
# "Current EPC Band": "Needs Survey"
# }
# )
# continue
# perform a weighted mean of SAP rating - the closer the better
expected_sap = np.average(
surveyed_similar["Current SAP Rating"], weights=1 / (surveyed_similar["distance_meters"] + 1)
surveyed["Current SAP Rating"], weights=1 / (surveyed["distance_meters"] + 1)
)
expected_epc = sap_to_epc(expected_sap)
@ -2153,23 +2189,21 @@ def propsed_wave_3_sample():
# '1 - Archetype surveyed',
# '1 - property was surveyed',
# '2 - same archetype',
# '3 - similar property',
# '3 - similar property, all areas searched',
# '3 - similar property, relaxed conditions'
# '3 - similar property, weighted on distance'
gain_columns = [
'1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype',
'3 - similar property, weighted on distance'
]
#
# Loss is the sum of these columns:
# '4 - no similar property, needs survey to confirm',
# '5 - EPC C or above', '5 - property was surveyed'
geographic_summary["Gain"] = geographic_summary[
[
'1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype', '3 - similar property',
'3 - similar property, all areas searched', '3 - similar property, relaxed conditions'
]
].sum(axis=1)
geographic_summary["Loss"] = geographic_summary[
['5 - EPC C or above', '5 - property was surveyed']
].sum(axis=1)
loss_columns = ['4 - no similar property, needs survey to confirm', '5 - EPC C or above',
'5 - property was surveyed']
geographic_summary["Gain"] = geographic_summary[gain_columns].sum(axis=1)
geographic_summary["Loss"] = geographic_summary[loss_columns].sum(axis=1)
print(geographic_summary.sum())
@ -2180,30 +2214,82 @@ def propsed_wave_3_sample():
loss = geographic_summary["Loss"].values
gain = geographic_summary["Gain"].values
# Define the coefficients for the objective function (negative because we maximize Gain)
c = -gain
def optimise(gain, loss, max_loss=250):
# Define constraints
A = [loss] # Only 1 constraint for now, total Loss
b = [250] # Maximum total Loss allowed
# Define the coefficients for the objective function (negative because we maximize Gain)
c = -gain
# Bounds for each variable (select or not select each row, 0 <= x <= 1)
bounds = [(0, 1) for _ in gain]
# Define constraints
A = [loss] # Only 1 constraint for now, total Loss
b = [max_loss] # Maximum total Loss allowed
# Solve the problem using linprog with HiGHS solver
result = linprog(c, A_ub=A, b_ub=b, bounds=bounds, method='highs')
if not result.success:
raise Exception("Optimization failed")
# Bounds for each variable (select or not select each row, 0 <= x <= 1)
bounds = [(0, 1) for _ in gain]
selected_rows = result.x.round().astype(int) # Rounded to 0 or 1
optimal_gain = -result.fun
print(optimal_gain)
# Solve the problem using linprog with HiGHS solver
result = linprog(c, A_ub=A, b_ub=b, bounds=bounds, method='highs')
if not result.success:
raise Exception("Optimization failed")
selected_rows = result.x.round().astype(int) # Rounded to 0 or 1
optimal_gain = -result.fun
return selected_rows, optimal_gain
selected_rows, _ = optimise(gain, loss, 250)
# Select the rows that are selected
geographic_summary["Selected"] = selected_rows == 1
geographic_summary[geographic_summary["Selected"]].sum()
bid_size = geographic_summary[geographic_summary["Selected"]][["Gain", "Loss"]].sum().sum()
region_totals = geographic_summary[
geographic_summary["Selected"]
][["Gain", "Loss"]].sum()
# We now see if there are any postcodes that have no loss that can be added
unselected_regions = geographic_summary[~geographic_summary["Selected"]]["Postal Region"].values
postcode_summary = results.pivot_table(
index='Postcode',
columns='Confidence Tier',
aggfunc='size',
fill_value=0
).reset_index()
postcode_summary = postcode_summary.merge(
results[["Postcode", "Postal Region"]].drop_duplicates(),
how="left", on="Postcode"
)
postcode_summary_unselected_regions = postcode_summary[
postcode_summary["Postcode"].str.split(" ").str[0].isin(unselected_regions)
].copy()
postcode_summary_unselected_regions["Gain"] = postcode_summary_unselected_regions[gain_columns].sum(axis=1)
postcode_summary_unselected_regions["Loss"] = postcode_summary_unselected_regions[loss_columns].sum(axis=1)
# Remaining loss allowed
remaining_loss_constraint = 250 - region_totals["Loss"]
postcode_selected_rows, _ = optimise(
gain=postcode_summary_unselected_regions["Gain"].values,
loss=postcode_summary_unselected_regions["Loss"].values,
max_loss=int(remaining_loss_constraint)
)
postcode_summary_unselected_regions["Selected"] = postcode_selected_rows == 1
postcode_summary_unselected_regions[postcode_summary_unselected_regions["Selected"]][["Gain", "Loss"]].sum()
postcode_optimised_additional_properties = postcode_summary_unselected_regions[
postcode_summary_unselected_regions["Selected"]
]
postcode_totals = postcode_optimised_additional_properties[["Gain", "Loss"]].sum()
bid_size = region_totals.sum() + postcode_totals.sum()
print("Bid Size:", bid_size)
total_epc_d_or_below = region_totals["Gain"] + postcode_totals["Gain"]
print("Total EPC D or below:", total_epc_d_or_below)
total_epc_c = region_totals["Loss"] + postcode_totals["Loss"]
print("Total EPC C or above:", total_epc_c)
# if __name__ == "__main__":
# main()