diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 4a503a08..50ed0772 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1065,21 +1065,8 @@ async def model_engine(body: PlanTriggerRequest): ) continue - fixed_gain = optimiser_functions.calculate_fixed_gain( - property_required_measures, recommendations, p, needs_ventilation - ) - gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages) - - # We insert the innovation uplift - measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) - - for group in measures_to_optimise_with_uplift: - for r in group: - (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], - r["uplift_project_score"]) = (0, 0, 0, 0) - already_installed_measures = [] - for measures in measures_to_optimise_with_uplift: + for measures in measures_to_optimise: for m in measures: # A) We're going to make the already installed measures default # B) We need to SAP points for all already installed measures to avoid double counting @@ -1096,6 +1083,22 @@ async def model_engine(body: PlanTriggerRequest): default_already_installed = keep_max_sap_per_measure_type(already_installed_measures) already_installed_sap = float(sum(d["sap_points"] for d in default_already_installed)) + fixed_gain = optimiser_functions.calculate_fixed_gain( + property_required_measures, recommendations, p, needs_ventilation + ) + gain = optimiser_functions.calculate_gain( + body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages, + already_installed_gain=already_installed_sap + ) + + # We insert the innovation uplift + measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) + + for group in measures_to_optimise_with_uplift: + for r in group: + (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"]) = (0, 0, 0, 0) + # Remove them from the optimisation pool finalised_measures_to_optimise = [] for m in measures_to_optimise_with_uplift: @@ -1115,7 +1118,7 @@ async def model_engine(body: PlanTriggerRequest): p=p, input_measures=input_measures, budget=body.budget, - target_gain=gain - already_installed_sap, + target_gain=gain, enforce_heat_pump_insulation=True, enforce_fabric_first=body.enforce_fabric_first, already_installed_sap=already_installed_sap, # To be passed to output diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py index b2dfb01e..3f56d82d 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py @@ -1,34 +1,5 @@ import pandas as pd -# import pandas as pd -# -# sal = pd.read_excel( -# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " -# "Project/data_validation/to_standardise_uprns - Standardised.xlsx", -# sheet_name="Standardised Asset List" -# ) -# -# # Quick breadown of missingness -# missing = sal[ -# pd.isnull(sal["estimated"]) | (sal["estimated"] == True) | pd.isnull(sal["epc_os_uprn"]) -# ] -# -# fetched = sal[(sal["estimated"] == False) | ~pd.isnull(sal["epc_os_uprn"])].copy() -# fetched = fetched[ -# ["landlord_property_id", "domna_address_1", "domna_postcode", "domna_full_address", "epc_address1", -# "epc_postcode", "epc_address", "landlord_property_type", "epc_property_type"] -# ] -# -# known_issues = [ -# -# ] -# -# # Missed postcodes -# missed_postcode_agg = missing.groupby("domna_postcode").size().reset_index(name="count") -# missed_postcode_agg = missed_postcode_agg.sort_values("count", ascending=False) -# -# multi_missed_postcode = missed_postcode_agg[missed_postcode_agg["count"] > 1] - ### Prepare sustainability_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index d22d0f9e..67ff2c85 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -5,7 +5,7 @@ from backend.app.db.connection import db_read_session from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import Plan -PORTFOLIO_ID = 433 +PORTFOLIO_ID = 435 with db_read_session() as session: # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 @@ -49,12 +49,13 @@ sal = sal.drop_duplicates(subset=['epc_os_uprn']) estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() SCENARIOS = [ - 871, # EPC C - fabric first, no solid floor, ashp 3.0 - 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - 862, # EPC B - No solid floor, ASHP COP 3.0 - 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP - 859, # EPC C - no solid floor, ashp 3.0 - 885, # EPC B - fabric first, no solid floor, ashp 3.0 + # 871, # EPC C - fabric first, no solid floor, ashp 3.0 + # 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 862, # EPC B - No solid floor, ASHP COP 3.0 + # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 859, # EPC C - no solid floor, ashp 3.0 + # 885, # EPC B - fabric first, no solid floor, ashp 3.0 + 908, 909, 910 ] # Get all plans, associated to these properties - the property IDs are in estimated_epc_ids diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index 5200c34d..cd7fba63 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -231,6 +231,261 @@ properties_data, plans_data, recommendations_data = get_data( ) recommendations_df = pd.DataFrame(recommendations_data) +properties_df = pd.DataFrame(properties_data) solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() + +# Check tenures +initial_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +sustainability_sample = sustainability_data[ + sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values) +] + +sustainability_sample = sustainability_sample.merge( + initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset") +) + +block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) +block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False) + +initial_asset_data.columns +initial_asset_data["LeaseType"].value_counts() + +# sustainability_sample["Tenure Group"].value_counts() +# Tenure Group +# General Needs 57787 +# Home Ownership 25471 +# Care & Supported Housing 4239 +# Rental 2677 +# Other 188 + +df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index() +df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False) + +tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index() +tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False) + +initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts() + +sample_data = initial_asset_data[ + ~initial_asset_data["Ownership Type"].isin( + [ + # Commercial # Everything is resi - based on the Residential Indicator variable - all are true + # Freeholder + "FREEHOLDER", # 19517 properties + # HOMEBUY / EQUITY LOAN + "Rent to Homebuy", # 1 property + # Leaseholder + "LEASEHOLD 100%", # 8455 properties + "Owned and Managed - 999 year lease", # 2076 properties + "Managed but not Owned-Private Lease", # 159 properties + "Owned and managed LEASEHOLD", # 26 properties + # Outright Sale - can't find anything matching + # SHARED EQUITY + "Shared Ownership", # 4065 properties + "Shared Ownership Owned Not Managed", # 23 properties + # Extra categories which seem sensible to exclude + "NOT MANAGED AND NOT OWNED" + ] + ) +] + +sample_data["Ownership Type"].value_counts() + +sample_data = initial_asset_data[ + initial_asset_data["Ownership Type"].isin( + [ + "Owned and Managed", + "Owned and Managed - 999 year lease", + "Owned and managed LEASEHOLD", + "LEASEHOLD 100%", + "DATALOAD DEFAULT" + ] + ) +] +dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)] +dropped["Ownership Type"].value_counts() + +for value in [ + # Commercial # Everything is resi, so should be fine. No matches + # Freeholder + "FREEHOLDER", # 19517 properties + # HOMEBUY / EQUITY LOAN + "Rent to Homebuy", # 1 property + # Leaseholder + "LEASEHOLD 100%", # 8455 properties + "Owned and Managed - 999 year lease", # 2076 properties + "Managed but not Owned-Private Lease", # 159 properties + "Owned and managed LEASEHOLD", # 26 properties + # Outright Sale - can't find anything matching + # SHARED EQUITY + "Shared Ownership", # 4065 properties + "Shared Ownership Owned Not Managed", # 23 properties +]: + print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0]) + +house_types = [ + "HOUSE", + "BUNGALOW", + "MAISONETTE", + "DUPLEX", +] + +guaranteed_control = [ + "Owned and Managed", + "Owned and Managed - 999 year lease", + "Owned and managed LEASEHOLD", + "LEASEHOLD 100%", + "DATALOAD DEFAULT", +] + +sample_data = initial_asset_data[ + ( + initial_asset_data["Ownership Type"].isin(guaranteed_control) + ) + | + ( + (initial_asset_data["Ownership Type"] == "FREEHOLDER") + & + (initial_asset_data["Property Type"].isin(house_types)) + ) + ] + +fabric_retrofit_sample = initial_asset_data[ + initial_asset_data["Ownership Type"].isin( + [ + "Owned and Managed", + "FREEHOLDER", + "DATALOAD DEFAULT", + ] + ) +] + +initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() +initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() + +initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts() +z = initial_asset_data[ + ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types) + ] + +block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) +zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"] + +potential_sample = initial_asset_data[ + ~pd.isnull(initial_asset_data["BlockCode"]) +] + +compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( + initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(), + left_on="Property Type", + right_on="Property Type", + suffixes=("_on_block_codes", "_overall") +) + +# Comparison of smaller sample vs overall +new_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Properties" +) + +new_sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Sustainability" +) + +sap_bands = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " + "08012026.xlsx", +) + +combined = new_asset_data.merge( + new_sustainability_data, + left_on="UPRN", + right_on="Org Ref", + suffixes=("_asset", "_sustainability") +).merge( + sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef" +) +reduced_sample = combined[ + ~combined["AH Tenure"].isin( + ["Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership"] + ) +].copy() + +# property types +property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( + combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(), + left_on="Property Type", + right_on="Property Type", + suffixes=("_reduced_sample", "_overall") +) + +# lodged ratings +lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts( + normalize=True).to_frame().reset_index().merge( + combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(), + left_on="Lodged EPC Band", + right_on="Lodged EPC Band", + suffixes=("_reduced_sample", "_overall") +) + +# modelled ratings +modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts( + normalize=True).to_frame().reset_index().merge( + combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(), + left_on="SAP Band", + right_on="SAP Band", + suffixes=("_reduced_sample", "_overall") +) + +# Testing measures +m1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - 20250113 final.xlsx" +) +m2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx" +) + +compare = m1.merge( + m2, + left_on="uprn", + right_on="uprn", + suffixes=("_ewi_iwi", "_no_ewi_iwi") +) + +# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario +only_no_ewi_iwi = compare[ + (compare["total_retrofit_cost_ewi_iwi"] == 0) & + (compare["total_retrofit_cost_no_ewi_iwi"] != 0) + ] + +(m1["total_retrofit_cost"] > 0).sum() +(m2["total_retrofit_cost"] > 0).sum() + +with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0] + +z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])] diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py b/etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py new file mode 100644 index 00000000..cbc52447 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py @@ -0,0 +1,115 @@ +import pandas as pd + +initial_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +asset_data_v2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Properties" +) + +desired_ownerships = asset_data_v2[ + ~asset_data_v2["AH Tenure"].isin( + {"Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership"} + ) +] + +desired_ownerships["Ownership Type"].value_counts() + +removed_ownerships = initial_asset_data[ + ~initial_asset_data["UPRN"].isin(desired_ownerships["UPRN"].values) +]["Ownership Type"].value_counts() + +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " + "ownership filtered sal.xlsx", + sheet_name="Standardised Asset List" +) + +# What did we include, that we shouldn't have? +should_have_been_dropped = sal[ + ~sal["landlord_property_id"].isin(desired_ownerships["UPRN"].values) +] + +needs_to_be_added = desired_ownerships[ + ~desired_ownerships["UPRN"].isin(sal["landlord_property_id"].values) +] + +# Merge on ownership types +sal = sal.merge( + initial_asset_data[["UPRN", "Ownership Type"]], + left_on="domna_property_id", + right_on="UPRN", +) + +# Remove the irrelevant ownership types +sal = sal[ + ~sal["Ownership Type"].isin( + [ + # Commercial # Everything is resi - based on the Residential Indicator variable - all are true + # Freeholder + "FREEHOLDER", # 19517 properties + # HOMEBUY / EQUITY LOAN + "Rent to Homebuy", # 1 property + # Leaseholder + "LEASEHOLD 100%", # 8455 properties + "Owned and Managed - 999 year lease", # 2076 properties + "Managed but not Owned-Private Lease", # 159 properties + "Owned and managed LEASEHOLD", # 26 properties + # Outright Sale - can't find anything matching + # SHARED EQUITY + "Shared Ownership", # 4065 properties + "Shared Ownership Owned Not Managed", # 23 properties + # Extra categories which seem sensible to exclude + "NOT MANAGED AND NOT OWNED" + ] + ) +] + +sal["landlord_property_id"] = sal["domna_property_id"].copy() + +# Store this SAL in three batches +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " + "ownership filtered sal.xlsx" +) +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Store the three sections + sal[0:20000].to_excel(writer, sheet_name="Batch 1", index=False) + sal[20000:40000].to_excel(writer, sheet_name="Batch 2", index=False) + sal[40000:].to_excel(writer, sheet_name="Batch 3", index=False) + +# Test reading back in and assembling +# b1 = pd.read_excel( +# filename, +# sheet_name="Batch 1" +# ) +# b2 = pd.read_excel( +# filename, +# sheet_name="Batch 2" +# ) +# b3 = pd.read_excel( +# filename, +# sheet_name="Batch 3" +# ) +# assembled_sal = pd.concat([b1, b2, b3]) +# # Make sure we have the right # of UPRNs +# assert assembled_sal["epc_os_uprn"].nunique() == sal["epc_os_uprn"].nunique() diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py new file mode 100644 index 00000000..a18dc315 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py @@ -0,0 +1,293 @@ +# ------ Pull in the full SAL sample ------ +import pandas as pd + +full_sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " + "SAL/Depracated/20260107 corrected batch 6 sal.xlsx", + sheet_name="Standardised Asset List" +) + +# ------Pull in the reduced sample ------ +# This has a slightly incorrect mix of ownership types. Some properties will need to be dropped and others, added +reduced_sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " + "ownership filtered sal.xlsx", + sheet_name="Standardised Asset List" +) + +# ------ Pull in the confirmed ownership column from Peabody ------ +new_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Properties" +) + +correct_sample = new_asset_data[ + ~new_asset_data["AH Tenure"].isin( + ["Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership"] + ) +].copy() + +# ------- Stuff to add ------- +# These are properties that need to be added to the reduced sample, from the SAL +stuff_to_add = correct_sample[ + ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values) +]["UPRN"].values + +sal_to_add = full_sal[ + full_sal["domna_property_id"].isin(stuff_to_add) +].copy() + +# ------- Stuff to remove ------- +stuff_to_remove = reduced_sal[ + ~reduced_sal["landlord_property_id"].isin(correct_sample["UPRN"].values) +]["landlord_property_id"].values + +to_delete = reduced_sal[ + reduced_sal["landlord_property_id"].isin(stuff_to_remove) +].copy() + +# ------- Create the correctly formatted SAL, with an individual batch for properties we need to add ------- + +# This is what is correct, from the reduced sample, after removing the incorrect ownership types +reduced_sal_final = reduced_sal[ + ~reduced_sal["landlord_property_id"].isin(stuff_to_remove) +].copy() + +sal_to_add["landlord_property_id"] = sal_to_add["domna_property_id"].copy() + +full_sal = pd.concat( + [reduced_sal_final, sal_to_add], +) + +# filename = ( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - " +# "final asset list.xlsx" +# ) +# with pd.ExcelWriter(filename) as writer: +# full_sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) +# # Store the three sections +# reduced_sal_final[0:25000].to_excel(writer, sheet_name="Batch 1 - was correct", index=False) +# reduced_sal_final[25000:].to_excel(writer, sheet_name="Batch 2 - was correct", index=False) +# sal_to_add.to_excel(writer, sheet_name="Batch 3 - needs adding", index=False) + +# We now prepare the process of getting the associated +# We have the properties we need to delete. We can get their associated plans for all scenario IDs +scenario_ids = [908, 909, 910] + +import pandas as pd +from sqlalchemy.orm import Session +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session, db_read_session +from sqlalchemy import select, func +from sqlalchemy.orm import Session +from backend.app.db.models.recommendations import Plan + +uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist() + +# PORTFOLIO_ID = 435 + +# SCENARIO_ID_WITH_PLANS_TO_DELETE = 910 + + +# Get the property IDs for these UPRNs +# def get_property_ids_for_uprns(session: Session, uprns: list[int], portfolio_id) -> list[int]: +# return [ +# property_id +# for (property_id,) in +# session.query(PropertyModel.id) +# .filter( +# PropertyModel.uprn.in_(uprns), +# PropertyModel.portfolio_id == portfolio_id +# ) +# .all() +# ] +# +# +# with db_read_session() as session: +# property_ids_to_delete = get_property_ids_for_uprns( +# session, uprns_to_be_deleted, portfolio_id=PORTFOLIO_ID +# ) +# +# +# def count_plans_for_scenario(session: Session, scenario_id: int, portfolio_id, property_ids) -> int: +# return session.execute( +# select(func.count()) +# .select_from(Plan) +# .where( +# Plan.scenario_id == scenario_id, +# Plan.portfolio_id == portfolio_id, +# Plan.property_id.in_(property_ids) +# ) +# ).scalar_one() +# +# +# with db_session() as session: +# n_plans = count_plans_for_scenario( +# session, +# scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE, +# portfolio_id=PORTFOLIO_ID, +# property_ids=property_ids_to_delete +# ) +# +# +# def get_plan_ids_for_scenario( +# session: Session, scenario_id: int, portfolio_id, property_ids +# ) -> list[int]: +# result = session.execute( +# select(Plan.id, Plan.property_id) +# .where( +# Plan.scenario_id == scenario_id, +# Plan.portfolio_id == portfolio_id, +# Plan.property_id.in_(property_ids) +# ) +# ) +# return [{"plan_id": row.id, "property_id": row.property_id} for row in result] +# +# +# with db_session() as session: +# plan_ids_to_property = get_plan_ids_for_scenario( +# session, +# scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE, +# portfolio_id=PORTFOLIO_ID, +# property_ids=property_ids_to_delete +# ) +# +# df = pd.DataFrame(plan_ids_to_property) +# df[df["property_id"].duplicated()].shape +# +# plan_ids = [row["plan_id"] for row in plan_ids_to_property] +# +# +# def chunked(iterable, size): +# for i in range(0, len(iterable), size): +# yield iterable[i:i + size] +# +# +# from sqlalchemy import text +# from sqlalchemy.orm import Session +# +# +# def delete_plan_batch(session: Session, plan_ids: list[int]): +# if not plan_ids: +# return +# +# session.execute(text("SET LOCAL lock_timeout = '5s'")) +# +# params = {"plan_ids": plan_ids} +# +# # ---------------------------- +# # recommendation_materials +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM recommendation_materials rm +# USING plan_recommendations pr +# WHERE rm.recommendation_id = pr.recommendation_id +# AND pr.plan_id = ANY(:plan_ids) +# """), +# params, +# ) +# +# # ---------------------------- +# # plan_recommendations +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM plan_recommendations +# WHERE plan_id = ANY(:plan_ids) +# """), +# params, +# ) +# +# # ---------------------------- +# # recommendations (only those used by these plans) +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM recommendation r +# WHERE r.id IN ( +# SELECT DISTINCT recommendation_id +# FROM plan_recommendations +# WHERE plan_id = ANY(:plan_ids) +# ) +# """), +# params, +# ) +# +# # ---------------------------- +# # plans LAST +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM plan +# WHERE id = ANY(:plan_ids) +# """), +# params, +# ) +# +# +# batch_size = 25 +# total = (len(plan_ids) + batch_size - 1) // batch_size +# +# for i, batch in enumerate(chunked(plan_ids, batch_size), start=1): +# print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)") +# +# with db_session() as session: +# delete_plan_batch(session, batch) +# +# print(f"Batch {i} committed") +# +# # Now, we delete the associated properties in batch and associated objects. It should +# # largely be property, property details +# property_ids_to_delete +# +# from sqlalchemy import text +# from sqlalchemy.orm import Session +# +# +# def move_properties_between_portfolios( +# session: Session, +# property_ids: list[int], +# from_portfolio_id: int, +# to_portfolio_id: int, +# ): +# if not property_ids: +# return 0 +# +# result = session.execute( +# text(""" +# UPDATE property +# SET portfolio_id = :to_portfolio_id +# WHERE portfolio_id = :from_portfolio_id +# AND id = ANY(:property_ids) +# """), +# { +# "property_ids": property_ids, +# "from_portfolio_id": from_portfolio_id, +# "to_portfolio_id": to_portfolio_id, +# }, +# ) +# +# return result.rowcount +# +# +# # Moved? +# # 573476, 586011 +# +# property_ids_to_delete2 = [x for x in property_ids_to_delete if x not in [573476, 586011]] +# +# with db_session() as session: +# n_moved = move_properties_between_portfolios( +# session, +# property_ids=property_ids_to_delete2, +# from_portfolio_id=PORTFOLIO_ID, +# to_portfolio_id=32, # Archive portfolio +# ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py new file mode 100644 index 00000000..4bd11a1b --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py @@ -0,0 +1,80 @@ +# 1) Need to get all already installed measures +# 2) get the unique uprns for these properties +# 3) Create a re-fresh SAL for these properties +# 4) re-trigger EPC C w/o EWI/IWI + the EPC B scenario + +from backend.app.db.models.recommendations import InstalledMeasure +from backend.app.db.connection import db_session +from etl.customers.cambridge.surveys import current_epc + +# Get all installed measures from the installedMeasure table +with db_session() as session: + # We need installed measures, where the measure type is ewi or iwi + installed_measures = session.query(InstalledMeasure).filter( + InstalledMeasure.measure_type.in_(["cavity_wall_insulation"]) + ).all() + # Get the uprns + installed_uprns = [x.uprn for x in installed_measures] + +installed_uprns = list(set(installed_uprns)) + +# We then create a portfolio of properties we need to re-run +import pandas as pd + +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - " + "final asset list.xlsx", + sheet_name="Standardised Asset List" +) + +needing_retry = sal[sal["epc_os_uprn"].isin(installed_uprns)] + +# Store +needing_retry.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " + "SAL/properties_needing_retry_20260115 - cavity wall insulation.xlsx", + sheet_name="Standardised Asset List", + index=False +) + +#### Testing +with_ewi = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - 20250113 final.xlsx" +) +without_ewi = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx" +) + +comparison = with_ewi.merge( + without_ewi, + left_on="uprn", + right_on="uprn", + suffixes=("_with_ewi", "_without_ewi") +) + +with_ewi = comparison[comparison["total_retrofit_cost_with_ewi"] > 0] +with_ewi["current_epc_rating_with_ewi"].value_counts() +with_ewi["current_epc_rating_with_ewi"].value_counts() + +without_ewi = comparison[comparison["total_retrofit_cost_without_ewi"] > 0] +with_ewi = comparison[comparison["total_retrofit_cost_with_ewi"] > 0] + +with_ewi[with_ewi["current_epc_rating_with_ewi"] == "Epc.C"]["uprn"] + +to_fix = with_ewi[with_ewi["current_epc_rating_with_ewi"] == "Epc.C"] +to_fix = to_fix[["uprn", "address_with_ewi", "postcode_with_ewi", "property_type_with_ewi"]].rename( + columns={ + "address_with_ewi": "address", + "postcode_with_ewi": "postcode", + "property_type_with_ewi": "property_type" + } +).merge( + sal[["epc_os_uprn", "landlord_built_form"]], + left_on="uprn", + right_on="epc_os_uprn", + how="left" +).drop(columns=["epc_os_uprn"]) + +to_fix = to_fix.to_dict("records") diff --git a/infrastructure/terraform/dev.tfvars b/infrastructure/terraform/dev.tfvars index dc84a01f..92b7e158 100644 --- a/infrastructure/terraform/dev.tfvars +++ b/infrastructure/terraform/dev.tfvars @@ -9,7 +9,7 @@ api_url_prefix = "api" # Database allocated_storage = 20 -instance_class = "db.t3.micro" +instance_class = "db.t4g.medium" database_name = "DevAssessmentModelDB" # S3 diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 2466ea4e..ab13134d 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -86,6 +86,18 @@ class Recommendations: inclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.inclusions] exclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.exclusions] + + # if we have already installed measures, we need to include them so they get factored into the baseline + # this is something we'll likely need to remove + if self.property_instance.already_installed: + # We make sure that any already installed measures are included + for rec in self.property_instance.already_installed: + if rec not in inclusions_full: + inclusions_full.append(rec) + + # We remove them from the exclusions if they are there + exclusions_full = [e for e in exclusions_full if e not in self.property_instance.already_installed] + # We need to unlist any lists, but we should check if they're lists first inclusions_full = [ item for sublist in inclusions_full for item in (sublist if isinstance(sublist, list) else [sublist]) diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index a8aa0ca3..ed2f50e2 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -39,7 +39,7 @@ class VentilationRecommendations(Definitions): parts = self.mechanical_ventilation_materials.copy() - already_installed = "cavity_wall_insulation" in self.property.already_installed + already_installed = "mechanical_ventilation" in self.property.already_installed # TODO: We now have multiple ventilation options - we default to selecting the cheapest option part = min(parts, key=lambda x: x['total_cost']) diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 0eec35dc..a4543dbf 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -202,8 +202,13 @@ def calculate_fixed_gain(property_required_measures, recommendations, p, needs_v return fixed_gain -def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float, - eco_packages: None | dict = None) -> float | None: +def calculate_gain( + body: PlanTriggerRequest, + p: Property, + fixed_gain: float, + eco_packages: None | dict = None, + already_installed_gain: float = 0, +) -> float | None: """ Calculates the target gain value for optimisation based on the goal. @@ -221,6 +226,7 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float, fixed_gain : float Total fixed gain from required measures (returned by calculate_fixed_gain). eco_packages : dict, optional + already_installed_gain: float, optional Returns ------- @@ -228,13 +234,17 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float, Required SAP gain for EPC, or None for non-EPC goals. """ if body.goal == "Increasing EPC": - current_sap = int(p.data["current-energy-efficiency"]) + current_sap = int(p.data["current-energy-efficiency"]) + already_installed_gain target_sap = ( eco_packages.get(p.id)[1] if eco_packages.get(p.id)[1] is not None else epc_to_sap_lower_bound(body.goal_value) ) + if target_sap <= current_sap: + # We've already met or exceeded the target EPC + return 0 + gain = CostOptimiser.calculate_sap_gain_with_slack( target_sap - current_sap ) - fixed_gain diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index 865e3398..ea0b5d94 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -85,6 +85,22 @@ class TestCalculateGain: gain = optimiser_functions.calculate_gain(body, prop, fixed_gain=0) assert gain is None + def test_returns_zero_for_already_installed_getting_to_target(self): + body = SimpleNamespace(goal="Increasing EPC", goal_value="C") + p = SimpleNamespace(data={"current-energy-efficiency": "67"}, id=1) + fixed_gain = 0 + eco_packages = {1: (None, None, None, [])} + already_installed_sap = 2 + gain = optimiser_functions.calculate_gain( + body=body, + p=p, + fixed_gain=fixed_gain, + eco_packages=eco_packages, + already_installed_gain=already_installed_sap + ) + + assert gain == 0 + def test_calculates_gain_for_epc(self, monkeypatch): # patch cost optimiser calculation monkeypatch.setattr(optimiser_functions, "epc_to_sap_lower_bound", lambda goal_value: 69) diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 36efb603..f12eb85d 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -14,14 +14,16 @@ from collections import defaultdict # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 434 # Peabody +PORTFOLIO_ID = 435 # Peabody SCENARIOS = [ - 904, - 905 + 908, + 909, + 910, ] scenario_names = { - 904: "EPC C - no solid floor, ashp 3.0", - 905: "EPC B - no solid floor, ashp 3.0", + 908: "EPC C - no solid floor, ashp 3.0", + 909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0", + 910: "EPC B - no solid floor, no EWI, ashp 3.0" } @@ -231,7 +233,7 @@ for scenario_id in SCENARIOS: # Create excel to store to filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - f"Project/Final SAL/{scenario_names[scenario_id]} - corrected.xlsx") + f"Project/Final SAL/{scenario_names[scenario_id]} - 20250113 final.xlsx") with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False)