implemented linear programming to find maximal bid size

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-17 18:05:05 +00:00
parent a01ff1d8de
commit 7d63c16404
2 changed files with 64 additions and 8 deletions

View file

@ -3,9 +3,9 @@ import PyPDF2
import re
import pandas as pd
import numpy as np
from docutils.utils.math.tex2mathml_extern import blahtexml
from tqdm import tqdm
from collections import Counter
from scipy.optimize import linprog
CUSTOMER_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
SURVEY_FOLDERS = os.path.join(CUSTOMER_FOLDER_PATH, "StonewaterSurveys_{i}")
@ -1843,13 +1843,38 @@ def propsed_wave_3_sample():
]
if surveyed_similar.empty:
final_missed_matches.append(
{
"Address ID": a_id,
"Confidence Tier": "4 - no similar property, needs survey to confirm",
"Current EPC Band": "Unknown"
}
)
# We get an average based on the postcode
surveyed_similar = survey_results_with_original_features[
(survey_results_with_original_features["Postal Region"] == property["Postal Region"]) &
(survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
filter_property_types
))
]
if surveyed_similar.empty:
final_missed_matches.append(
{
"Address ID": a_id,
"Confidence Tier": "4 - no similar property, needs survey to confirm",
"Current EPC Band": "Unknown"
}
)
else:
expected_sap = surveyed_similar["Current SAP Rating"].mean()
expected_epc = sap_to_epc(expected_sap)
if expected_epc in ["C", "B", "A"]:
tier = "5 - EPC C or above"
else:
tier = "3 - similar property, relaxed conditions"
final_missed_matches.append(
{
"Address ID": a_id,
"Confidence Tier": tier,
"Current EPC Band": expected_epc
}
)
continue
# We take an average
expected_sap = surveyed_similar["Current SAP Rating"].mean()
@ -1922,5 +1947,35 @@ def propsed_wave_3_sample():
geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum()
geographic_summary[geographic_summary["Loss Cumulative Sum"] <= 250]["Gain"].sum()
geographic_summary[["Loss", "Gain"]].head()
loss = geographic_summary["Loss"].values
gain = geographic_summary["Gain"].values
# Define the coefficients for the objective function (negative because we maximize Gain)
c = -gain
# Define constraints
A = [loss] # Only 1 constraint for now, total Loss
b = [250] # Maximum total Loss allowed
# Bounds for each variable (select or not select each row, 0 <= x <= 1)
bounds = [(0, 1) for _ in gain]
# Solve the problem using linprog with HiGHS solver
result = linprog(c, A_ub=A, b_ub=b, bounds=bounds, method='highs')
if not result.success:
raise Exception("Optimization failed")
selected_rows = result.x.round().astype(int) # Rounded to 0 or 1
optimal_gain = -result.fun
print(optimal_gain)
# Select the rows that are selected
geographic_summary["Selected"] = selected_rows == 1
geographic_summary[geographic_summary["Selected"]].sum()
bid_size = geographic_summary[geographic_summary["Selected"]][["Gain", "Loss"]].sum().sum()
print("Bid Size:", bid_size)
# if __name__ == "__main__":
# main()

View file

@ -7,4 +7,5 @@ epc-api-python==1.0.2
usaddress==0.5.11
fuzzywuzzy==0.18.0
python-dotenv
scipy