Merge branch 'main' of github.com:Hestia-Homes/Model into feature/fix_devcontainer

2026-06-08 11:17:27 +00:00 · 2026-01-16 11:28:34 +00:00 · 2026-01-16 11:28:34 +00:00 · 4d7e2ed793
commit 4d7e2ed793
parent 6f8899a0dc 941be42b83
13 changed files with 820 additions and 62 deletions
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@ -1065,21 +1065,8 @@ async def model_engine(body: PlanTriggerRequest):
                )
                continue

-            fixed_gain = optimiser_functions.calculate_fixed_gain(
-                property_required_measures, recommendations, p, needs_ventilation
-            )
-            gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages)
-
-            # We insert the innovation uplift
-            measures_to_optimise_with_uplift = deepcopy(measures_to_optimise)
-
-            for group in measures_to_optimise_with_uplift:
-                for r in group:
-                    (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
-                     r["uplift_project_score"]) = (0, 0, 0, 0)
-
            already_installed_measures = []
-            for measures in measures_to_optimise_with_uplift:
+            for measures in measures_to_optimise:
                for m in measures:
                    # A) We're going to make the already installed measures default
                    # B) We need to SAP points for all already installed measures to avoid double counting
@ -1096,6 +1083,22 @@ async def model_engine(body: PlanTriggerRequest):
            default_already_installed = keep_max_sap_per_measure_type(already_installed_measures)
            already_installed_sap = float(sum(d["sap_points"] for d in default_already_installed))

+            fixed_gain = optimiser_functions.calculate_fixed_gain(
+                property_required_measures, recommendations, p, needs_ventilation
+            )
+            gain = optimiser_functions.calculate_gain(
+                body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages,
+                already_installed_gain=already_installed_sap
+            )
+
+            # We insert the innovation uplift
+            measures_to_optimise_with_uplift = deepcopy(measures_to_optimise)
+
+            for group in measures_to_optimise_with_uplift:
+                for r in group:
+                    (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
+                     r["uplift_project_score"]) = (0, 0, 0, 0)
+
            # Remove them from the optimisation pool
            finalised_measures_to_optimise = []
            for m in measures_to_optimise_with_uplift:
@ -1115,7 +1118,7 @@ async def model_engine(body: PlanTriggerRequest):
                    p=p,
                    input_measures=input_measures,
                    budget=body.budget,
-                    target_gain=gain - already_installed_sap,
+                    target_gain=gain,
                    enforce_heat_pump_insulation=True,
                    enforce_fabric_first=body.enforce_fabric_first,
                    already_installed_sap=already_installed_sap,  # To be passed to output
--- a/Project/c_finalised_modelling_data.py
+++ b/Project/c_finalised_modelling_data.py
@ -1,34 +1,5 @@
 import pandas as pd

-# import pandas as pd
-#
-# sal = pd.read_excel(
-#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-#     "Project/data_validation/to_standardise_uprns - Standardised.xlsx",
-#     sheet_name="Standardised Asset List"
-# )
-#
-# # Quick breadown of missingness
-# missing = sal[
-#     pd.isnull(sal["estimated"]) | (sal["estimated"] == True) | pd.isnull(sal["epc_os_uprn"])
-#     ]
-#
-# fetched = sal[(sal["estimated"] == False) | ~pd.isnull(sal["epc_os_uprn"])].copy()
-# fetched = fetched[
-#     ["landlord_property_id", "domna_address_1", "domna_postcode", "domna_full_address", "epc_address1",
-#      "epc_postcode", "epc_address", "landlord_property_type", "epc_property_type"]
-# ]
-#
-# known_issues = [
-#
-# ]
-#
-# # Missed postcodes
-# missed_postcode_agg = missing.groupby("domna_postcode").size().reset_index(name="count")
-# missed_postcode_agg = missed_postcode_agg.sort_values("count", ascending=False)
-#
-# multi_missed_postcode = missed_postcode_agg[missed_postcode_agg["count"] > 1]
-
 ### Prepare
 sustainability_data = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
--- a/Project/h_reset_estimated_epcs.py
+++ b/Project/h_reset_estimated_epcs.py
@ -5,7 +5,7 @@ from backend.app.db.connection import db_read_session
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from backend.app.db.models.recommendations import Plan

-PORTFOLIO_ID = 433
+PORTFOLIO_ID = 435

 with db_read_session() as session:
    # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419
@ -49,12 +49,13 @@ sal = sal.drop_duplicates(subset=['epc_os_uprn'])
 estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy()

 SCENARIOS = [
-    871,  # EPC C - fabric first, no solid floor, ashp 3.0
-    863,  # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
-    862,  # EPC B - No solid floor, ASHP COP 3.0
-    861,  # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
-    859,  # EPC C - no solid floor, ashp 3.0
-    885,  # EPC B - fabric first, no solid floor, ashp 3.0
+    # 871,  # EPC C - fabric first, no solid floor, ashp 3.0
+    # 863,  # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
+    # 862,  # EPC B - No solid floor, ASHP COP 3.0
+    # 861,  # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
+    # 859,  # EPC C - no solid floor, ashp 3.0
+    # 885,  # EPC B - fabric first, no solid floor, ashp 3.0
+    908, 909, 910
 ]

 # Get all plans, associated to these properties - the property IDs are in estimated_epc_ids
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -231,6 +231,261 @@ properties_data, plans_data, recommendations_data = get_data(
 )

 recommendations_df = pd.DataFrame(recommendations_data)
+properties_df = pd.DataFrame(properties_data)

 solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
 average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
+
+# Check tenures
+initial_asset_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+    "- Data Extracts for Domna.xlsx",
+    sheet_name="Properties"
+)
+sustainability_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+    "- Data Extracts for Domna.xlsx",
+    sheet_name="Sustainability"
+)
+
+sustainability_sample = sustainability_data[
+    sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
+]
+
+sustainability_sample = sustainability_sample.merge(
+    initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
+)
+
+block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
+block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
+
+initial_asset_data.columns
+initial_asset_data["LeaseType"].value_counts()
+
+# sustainability_sample["Tenure Group"].value_counts()
+# Tenure Group
+# General Needs               57787
+# Home Ownership              25471
+# Care & Supported Housing     4239
+# Rental                       2677
+# Other                         188
+
+df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
+df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
+
+tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
+tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
+
+initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
+
+sample_data = initial_asset_data[
+    ~initial_asset_data["Ownership Type"].isin(
+        [
+            # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
+            # Freeholder
+            "FREEHOLDER",  # 19517 properties
+            # HOMEBUY / EQUITY LOAN
+            "Rent to Homebuy",  # 1 property
+            # Leaseholder
+            "LEASEHOLD 100%",  # 8455 properties
+            "Owned and Managed - 999 year lease",  # 2076 properties
+            "Managed but not Owned-Private Lease",  # 159 properties
+            "Owned and managed LEASEHOLD",  # 26 properties
+            # Outright Sale - can't find anything matching
+            # SHARED EQUITY
+            "Shared Ownership",  # 4065 properties
+            "Shared Ownership Owned Not Managed",  # 23 properties
+            # Extra categories which seem sensible to exclude
+            "NOT MANAGED AND NOT OWNED"
+        ]
+    )
+]
+
+sample_data["Ownership Type"].value_counts()
+
+sample_data = initial_asset_data[
+    initial_asset_data["Ownership Type"].isin(
+        [
+            "Owned and Managed",
+            "Owned and Managed - 999 year lease",
+            "Owned and managed LEASEHOLD",
+            "LEASEHOLD 100%",
+            "DATALOAD DEFAULT"
+        ]
+    )
+]
+dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
+dropped["Ownership Type"].value_counts()
+
+for value in [
+    # Commercial # Everything is resi, so should be fine. No matches
+    # Freeholder
+    "FREEHOLDER",  # 19517 properties
+    # HOMEBUY / EQUITY LOAN
+    "Rent to Homebuy",  # 1 property
+    # Leaseholder
+    "LEASEHOLD 100%",  # 8455 properties
+    "Owned and Managed - 999 year lease",  # 2076 properties
+    "Managed but not Owned-Private Lease",  # 159 properties
+    "Owned and managed LEASEHOLD",  # 26 properties
+    # Outright Sale - can't find anything matching
+    # SHARED EQUITY
+    "Shared Ownership",  # 4065 properties
+    "Shared Ownership Owned Not Managed",  # 23 properties
+]:
+    print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
+
+house_types = [
+    "HOUSE",
+    "BUNGALOW",
+    "MAISONETTE",
+    "DUPLEX",
+]
+
+guaranteed_control = [
+    "Owned and Managed",
+    "Owned and Managed - 999 year lease",
+    "Owned and managed LEASEHOLD",
+    "LEASEHOLD 100%",
+    "DATALOAD DEFAULT",
+]
+
+sample_data = initial_asset_data[
+    (
+        initial_asset_data["Ownership Type"].isin(guaranteed_control)
+    )
+    |
+    (
+        (initial_asset_data["Ownership Type"] == "FREEHOLDER")
+        &
+        (initial_asset_data["Property Type"].isin(house_types))
+    )
+    ]
+
+fabric_retrofit_sample = initial_asset_data[
+    initial_asset_data["Ownership Type"].isin(
+        [
+            "Owned and Managed",
+            "FREEHOLDER",
+            "DATALOAD DEFAULT",
+        ]
+    )
+]
+
+initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
+initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
+
+initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
+z = initial_asset_data[
+    ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
+    ]
+
+block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
+zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
+
+potential_sample = initial_asset_data[
+    ~pd.isnull(initial_asset_data["BlockCode"])
+]
+
+compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
+    initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
+    left_on="Property Type",
+    right_on="Property Type",
+    suffixes=("_on_block_codes", "_overall")
+)
+
+# Comparison of smaller sample vs overall
+new_asset_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
+    "- Peabody "
+    "- Data Extracts for Domna v2.xlsx",
+    sheet_name="Properties"
+)
+
+new_sustainability_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
+    "- Peabody "
+    "- Data Extracts for Domna v2.xlsx",
+    sheet_name="Sustainability"
+)
+
+sap_bands = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
+    "08012026.xlsx",
+)
+
+combined = new_asset_data.merge(
+    new_sustainability_data,
+    left_on="UPRN",
+    right_on="Org Ref",
+    suffixes=("_asset", "_sustainability")
+).merge(
+    sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
+)
+reduced_sample = combined[
+    ~combined["AH Tenure"].isin(
+        ["Commercial",
+         "Freeholder",
+         "HOMEBUY / EQUITY LOAN",
+         "Leaseholder",
+         "Outright Sale",
+         "SHARED EQUITY",
+         "Shared Ownership"]
+    )
+].copy()
+
+# property types
+property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
+    combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
+    left_on="Property Type",
+    right_on="Property Type",
+    suffixes=("_reduced_sample", "_overall")
+)
+
+# lodged ratings
+lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
+    normalize=True).to_frame().reset_index().merge(
+    combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
+    left_on="Lodged EPC Band",
+    right_on="Lodged EPC Band",
+    suffixes=("_reduced_sample", "_overall")
+)
+
+# modelled ratings
+modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
+    normalize=True).to_frame().reset_index().merge(
+    combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
+    left_on="SAP Band",
+    right_on="SAP Band",
+    suffixes=("_reduced_sample", "_overall")
+)
+
+# Testing measures
+m1 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, ashp 3.0 - 20250113 final.xlsx"
+)
+m2 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
+)
+
+compare = m1.merge(
+    m2,
+    left_on="uprn",
+    right_on="uprn",
+    suffixes=("_ewi_iwi", "_no_ewi_iwi")
+)
+
+# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
+only_no_ewi_iwi = compare[
+    (compare["total_retrofit_cost_ewi_iwi"] == 0) &
+    (compare["total_retrofit_cost_no_ewi_iwi"] != 0)
+    ]
+
+(m1["total_retrofit_cost"] > 0).sum()
+(m2["total_retrofit_cost"] > 0).sum()
+
+with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
+
+z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
--- a/Project/l_reduced_sample.py
+++ b/Project/l_reduced_sample.py
@ -0,0 +1,115 @@
+import pandas as pd
+
+initial_asset_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+    "- Data Extracts for Domna.xlsx",
+    sheet_name="Properties"
+)
+
+sustainability_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+    "- Data Extracts for Domna.xlsx",
+    sheet_name="Sustainability"
+)
+
+asset_data_v2 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
+    "- Peabody "
+    "- Data Extracts for Domna v2.xlsx",
+    sheet_name="Properties"
+)
+
+desired_ownerships = asset_data_v2[
+    ~asset_data_v2["AH Tenure"].isin(
+        {"Commercial",
+         "Freeholder",
+         "HOMEBUY / EQUITY LOAN",
+         "Leaseholder",
+         "Outright Sale",
+         "SHARED EQUITY",
+         "Shared Ownership"}
+    )
+]
+
+desired_ownerships["Ownership Type"].value_counts()
+
+removed_ownerships = initial_asset_data[
+    ~initial_asset_data["UPRN"].isin(desired_ownerships["UPRN"].values)
+]["Ownership Type"].value_counts()
+
+sal = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
+    "ownership filtered sal.xlsx",
+    sheet_name="Standardised Asset List"
+)
+
+# What did we include, that we shouldn't have?
+should_have_been_dropped = sal[
+    ~sal["landlord_property_id"].isin(desired_ownerships["UPRN"].values)
+]
+
+needs_to_be_added = desired_ownerships[
+    ~desired_ownerships["UPRN"].isin(sal["landlord_property_id"].values)
+]
+
+# Merge on ownership types
+sal = sal.merge(
+    initial_asset_data[["UPRN", "Ownership Type"]],
+    left_on="domna_property_id",
+    right_on="UPRN",
+)
+
+# Remove the irrelevant ownership types
+sal = sal[
+    ~sal["Ownership Type"].isin(
+        [
+            # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
+            # Freeholder
+            "FREEHOLDER",  # 19517 properties
+            # HOMEBUY / EQUITY LOAN
+            "Rent to Homebuy",  # 1 property
+            # Leaseholder
+            "LEASEHOLD 100%",  # 8455 properties
+            "Owned and Managed - 999 year lease",  # 2076 properties
+            "Managed but not Owned-Private Lease",  # 159 properties
+            "Owned and managed LEASEHOLD",  # 26 properties
+            # Outright Sale - can't find anything matching
+            # SHARED EQUITY
+            "Shared Ownership",  # 4065 properties
+            "Shared Ownership Owned Not Managed",  # 23 properties
+            # Extra categories which seem sensible to exclude
+            "NOT MANAGED AND NOT OWNED"
+        ]
+    )
+]
+
+sal["landlord_property_id"] = sal["domna_property_id"].copy()
+
+# Store this SAL in three batches
+filename = (
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
+    "ownership filtered sal.xlsx"
+)
+with pd.ExcelWriter(filename) as writer:
+    sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+    # Store the three sections
+    sal[0:20000].to_excel(writer, sheet_name="Batch 1", index=False)
+    sal[20000:40000].to_excel(writer, sheet_name="Batch 2", index=False)
+    sal[40000:].to_excel(writer, sheet_name="Batch 3", index=False)
+
+# Test reading back in and assembling
+# b1 = pd.read_excel(
+#     filename,
+#     sheet_name="Batch 1"
+# )
+# b2 = pd.read_excel(
+#     filename,
+#     sheet_name="Batch 2"
+# )
+# b3 = pd.read_excel(
+#     filename,
+#     sheet_name="Batch 3"
+# )
+# assembled_sal = pd.concat([b1, b2, b3])
+# # Make sure we have the right # of UPRNs
+# assert assembled_sal["epc_os_uprn"].nunique() == sal["epc_os_uprn"].nunique()
--- a/Project/m_reduced_sample_revised.py
+++ b/Project/m_reduced_sample_revised.py
@ -0,0 +1,293 @@
+# ------ Pull in the full SAL sample ------
+import pandas as pd
+
+full_sal = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
+    "SAL/Depracated/20260107 corrected batch 6 sal.xlsx",
+    sheet_name="Standardised Asset List"
+)
+
+# ------Pull in the reduced sample ------
+# This has a slightly incorrect mix of ownership types. Some properties will need to be dropped and others, added
+reduced_sal = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
+    "ownership filtered sal.xlsx",
+    sheet_name="Standardised Asset List"
+)
+
+# ------ Pull in the confirmed ownership column from Peabody ------
+new_asset_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
+    "- Peabody "
+    "- Data Extracts for Domna v2.xlsx",
+    sheet_name="Properties"
+)
+
+correct_sample = new_asset_data[
+    ~new_asset_data["AH Tenure"].isin(
+        ["Commercial",
+         "Freeholder",
+         "HOMEBUY / EQUITY LOAN",
+         "Leaseholder",
+         "Outright Sale",
+         "SHARED EQUITY",
+         "Shared Ownership"]
+    )
+].copy()
+
+# ------- Stuff to add -------
+# These are properties that need to be added to the reduced sample, from the SAL
+stuff_to_add = correct_sample[
+    ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values)
+]["UPRN"].values
+
+sal_to_add = full_sal[
+    full_sal["domna_property_id"].isin(stuff_to_add)
+].copy()
+
+# ------- Stuff to remove -------
+stuff_to_remove = reduced_sal[
+    ~reduced_sal["landlord_property_id"].isin(correct_sample["UPRN"].values)
+]["landlord_property_id"].values
+
+to_delete = reduced_sal[
+    reduced_sal["landlord_property_id"].isin(stuff_to_remove)
+].copy()
+
+# ------- Create the correctly formatted SAL, with an individual batch for properties we need to add -------
+
+# This is what is correct, from the reduced sample, after removing the incorrect ownership types
+reduced_sal_final = reduced_sal[
+    ~reduced_sal["landlord_property_id"].isin(stuff_to_remove)
+].copy()
+
+sal_to_add["landlord_property_id"] = sal_to_add["domna_property_id"].copy()
+
+full_sal = pd.concat(
+    [reduced_sal_final, sal_to_add],
+)
+
+# filename = (
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - "
+#     "final asset list.xlsx"
+# )
+# with pd.ExcelWriter(filename) as writer:
+#     full_sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+#     # Store the three sections
+#     reduced_sal_final[0:25000].to_excel(writer, sheet_name="Batch 1 - was correct", index=False)
+#     reduced_sal_final[25000:].to_excel(writer, sheet_name="Batch 2 - was correct", index=False)
+#     sal_to_add.to_excel(writer, sheet_name="Batch 3 - needs adding", index=False)
+
+# We now prepare the process of getting the associated
+# We have the properties we need to delete. We can get their associated plans for all scenario IDs
+scenario_ids = [908, 909, 910]
+
+import pandas as pd
+from sqlalchemy.orm import Session
+from backend.app.db.models.portfolio import PropertyModel
+from backend.app.db.connection import db_session, db_read_session
+from sqlalchemy import select, func
+from sqlalchemy.orm import Session
+from backend.app.db.models.recommendations import Plan
+
+uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist()
+
+# PORTFOLIO_ID = 435
+
+# SCENARIO_ID_WITH_PLANS_TO_DELETE = 910
+
+
+# Get the property IDs for these UPRNs
+# def get_property_ids_for_uprns(session: Session, uprns: list[int], portfolio_id) -> list[int]:
+#     return [
+#         property_id
+#         for (property_id,) in
+#         session.query(PropertyModel.id)
+#         .filter(
+#             PropertyModel.uprn.in_(uprns),
+#             PropertyModel.portfolio_id == portfolio_id
+#         )
+#         .all()
+#     ]
+#
+#
+# with db_read_session() as session:
+#     property_ids_to_delete = get_property_ids_for_uprns(
+#         session, uprns_to_be_deleted, portfolio_id=PORTFOLIO_ID
+#     )
+#
+#
+# def count_plans_for_scenario(session: Session, scenario_id: int, portfolio_id, property_ids) -> int:
+#     return session.execute(
+#         select(func.count())
+#         .select_from(Plan)
+#         .where(
+#             Plan.scenario_id == scenario_id,
+#             Plan.portfolio_id == portfolio_id,
+#             Plan.property_id.in_(property_ids)
+#         )
+#     ).scalar_one()
+#
+#
+# with db_session() as session:
+#     n_plans = count_plans_for_scenario(
+#         session,
+#         scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE,
+#         portfolio_id=PORTFOLIO_ID,
+#         property_ids=property_ids_to_delete
+#     )
+#
+#
+# def get_plan_ids_for_scenario(
+#     session: Session, scenario_id: int, portfolio_id, property_ids
+# ) -> list[int]:
+#     result = session.execute(
+#         select(Plan.id, Plan.property_id)
+#         .where(
+#             Plan.scenario_id == scenario_id,
+#             Plan.portfolio_id == portfolio_id,
+#             Plan.property_id.in_(property_ids)
+#         )
+#     )
+#     return [{"plan_id": row.id, "property_id": row.property_id} for row in result]
+#
+#
+# with db_session() as session:
+#     plan_ids_to_property = get_plan_ids_for_scenario(
+#         session,
+#         scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE,
+#         portfolio_id=PORTFOLIO_ID,
+#         property_ids=property_ids_to_delete
+#     )
+#
+# df = pd.DataFrame(plan_ids_to_property)
+# df[df["property_id"].duplicated()].shape
+#
+# plan_ids = [row["plan_id"] for row in plan_ids_to_property]
+#
+#
+# def chunked(iterable, size):
+#     for i in range(0, len(iterable), size):
+#         yield iterable[i:i + size]
+#
+#
+# from sqlalchemy import text
+# from sqlalchemy.orm import Session
+#
+#
+# def delete_plan_batch(session: Session, plan_ids: list[int]):
+#     if not plan_ids:
+#         return
+#
+#     session.execute(text("SET LOCAL lock_timeout = '5s'"))
+#
+#     params = {"plan_ids": plan_ids}
+#
+#     # ----------------------------
+#     # recommendation_materials
+#     # ----------------------------
+#     session.execute(
+#         text("""
+#             DELETE FROM recommendation_materials rm
+#             USING plan_recommendations pr
+#             WHERE rm.recommendation_id = pr.recommendation_id
+#               AND pr.plan_id = ANY(:plan_ids)
+#         """),
+#         params,
+#     )
+#
+#     # ----------------------------
+#     # plan_recommendations
+#     # ----------------------------
+#     session.execute(
+#         text("""
+#             DELETE FROM plan_recommendations
+#             WHERE plan_id = ANY(:plan_ids)
+#         """),
+#         params,
+#     )
+#
+#     # ----------------------------
+#     # recommendations (only those used by these plans)
+#     # ----------------------------
+#     session.execute(
+#         text("""
+#             DELETE FROM recommendation r
+#             WHERE r.id IN (
+#                 SELECT DISTINCT recommendation_id
+#                 FROM plan_recommendations
+#                 WHERE plan_id = ANY(:plan_ids)
+#             )
+#         """),
+#         params,
+#     )
+#
+#     # ----------------------------
+#     # plans LAST
+#     # ----------------------------
+#     session.execute(
+#         text("""
+#             DELETE FROM plan
+#             WHERE id = ANY(:plan_ids)
+#         """),
+#         params,
+#     )
+#
+#
+# batch_size = 25
+# total = (len(plan_ids) + batch_size - 1) // batch_size
+#
+# for i, batch in enumerate(chunked(plan_ids, batch_size), start=1):
+#     print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)")
+#
+#     with db_session() as session:
+#         delete_plan_batch(session, batch)
+#
+#     print(f"Batch {i} committed")
+#
+# # Now, we delete the associated properties in batch and associated objects. It should
+# # largely be property, property details
+# property_ids_to_delete
+#
+# from sqlalchemy import text
+# from sqlalchemy.orm import Session
+#
+#
+# def move_properties_between_portfolios(
+#     session: Session,
+#     property_ids: list[int],
+#     from_portfolio_id: int,
+#     to_portfolio_id: int,
+# ):
+#     if not property_ids:
+#         return 0
+#
+#     result = session.execute(
+#         text("""
+#             UPDATE property
+#             SET portfolio_id = :to_portfolio_id
+#             WHERE portfolio_id = :from_portfolio_id
+#               AND id = ANY(:property_ids)
+#         """),
+#         {
+#             "property_ids": property_ids,
+#             "from_portfolio_id": from_portfolio_id,
+#             "to_portfolio_id": to_portfolio_id,
+#         },
+#     )
+#
+#     return result.rowcount
+#
+#
+# # Moved?
+# # 573476, 586011
+#
+# property_ids_to_delete2 = [x for x in property_ids_to_delete if x not in [573476, 586011]]
+#
+# with db_session() as session:
+#     n_moved = move_properties_between_portfolios(
+#         session,
+#         property_ids=property_ids_to_delete2,
+#         from_portfolio_id=PORTFOLIO_ID,
+#         to_portfolio_id=32,  # Archive portfolio
+#     )
--- a/Project/n_fixing_already_installed_bug.py
+++ b/Project/n_fixing_already_installed_bug.py
@ -0,0 +1,80 @@
+# 1) Need to get all already installed measures
+# 2) get the unique uprns for these properties
+# 3) Create a re-fresh SAL for these properties
+# 4) re-trigger EPC C w/o EWI/IWI + the EPC B scenario
+
+from backend.app.db.models.recommendations import InstalledMeasure
+from backend.app.db.connection import db_session
+from etl.customers.cambridge.surveys import current_epc
+
+# Get all installed measures from the installedMeasure table
+with db_session() as session:
+    # We need installed measures, where the measure type is ewi or iwi
+    installed_measures = session.query(InstalledMeasure).filter(
+        InstalledMeasure.measure_type.in_(["cavity_wall_insulation"])
+    ).all()
+    # Get the uprns
+    installed_uprns = [x.uprn for x in installed_measures]
+
+installed_uprns = list(set(installed_uprns))
+
+# We then create a portfolio of properties we need to re-run
+import pandas as pd
+
+sal = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - "
+    "final asset list.xlsx",
+    sheet_name="Standardised Asset List"
+)
+
+needing_retry = sal[sal["epc_os_uprn"].isin(installed_uprns)]
+
+# Store
+needing_retry.to_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
+    "SAL/properties_needing_retry_20260115 - cavity wall insulation.xlsx",
+    sheet_name="Standardised Asset List",
+    index=False
+)
+
+#### Testing
+with_ewi = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, ashp 3.0 - 20250113 final.xlsx"
+)
+without_ewi = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+    "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
+)
+
+comparison = with_ewi.merge(
+    without_ewi,
+    left_on="uprn",
+    right_on="uprn",
+    suffixes=("_with_ewi", "_without_ewi")
+)
+
+with_ewi = comparison[comparison["total_retrofit_cost_with_ewi"] > 0]
+with_ewi["current_epc_rating_with_ewi"].value_counts()
+with_ewi["current_epc_rating_with_ewi"].value_counts()
+
+without_ewi = comparison[comparison["total_retrofit_cost_without_ewi"] > 0]
+with_ewi = comparison[comparison["total_retrofit_cost_with_ewi"] > 0]
+
+with_ewi[with_ewi["current_epc_rating_with_ewi"] == "Epc.C"]["uprn"]
+
+to_fix = with_ewi[with_ewi["current_epc_rating_with_ewi"] == "Epc.C"]
+to_fix = to_fix[["uprn", "address_with_ewi", "postcode_with_ewi", "property_type_with_ewi"]].rename(
+    columns={
+        "address_with_ewi": "address",
+        "postcode_with_ewi": "postcode",
+        "property_type_with_ewi": "property_type"
+    }
+).merge(
+    sal[["epc_os_uprn", "landlord_built_form"]],
+    left_on="uprn",
+    right_on="epc_os_uprn",
+    how="left"
+).drop(columns=["epc_os_uprn"])
+
+to_fix = to_fix.to_dict("records")
--- a/infrastructure/terraform/dev.tfvars
+++ b/infrastructure/terraform/dev.tfvars
@ -9,7 +9,7 @@ api_url_prefix = "api"

 # Database
 allocated_storage = 20
-instance_class = "db.t3.micro"
+instance_class = "db.t4g.medium"
 database_name = "DevAssessmentModelDB"

 # S3
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@ -86,6 +86,18 @@ class Recommendations:

        inclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.inclusions]
        exclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.exclusions]
+
+        # if we have already installed measures, we need to include them so they get factored into the baseline
+        # this is something we'll likely need to remove
+        if self.property_instance.already_installed:
+            # We make sure that any already installed measures are included
+            for rec in self.property_instance.already_installed:
+                if rec not in inclusions_full:
+                    inclusions_full.append(rec)
+
+            # We remove them from the exclusions if they are there
+            exclusions_full = [e for e in exclusions_full if e not in self.property_instance.already_installed]
+
        # We need to unlist any lists, but we should check if they're lists first
        inclusions_full = [
            item for sublist in inclusions_full for item in (sublist if isinstance(sublist, list) else [sublist])
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@ -39,7 +39,7 @@ class VentilationRecommendations(Definitions):

        parts = self.mechanical_ventilation_materials.copy()

-        already_installed = "cavity_wall_insulation" in self.property.already_installed
+        already_installed = "mechanical_ventilation" in self.property.already_installed

        # TODO: We now have multiple ventilation options - we default to selecting the cheapest option
        part = min(parts, key=lambda x: x['total_cost'])
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@ -202,8 +202,13 @@ def calculate_fixed_gain(property_required_measures, recommendations, p, needs_v
    return fixed_gain


-def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float,
-                   eco_packages: None | dict = None) -> float | None:
+def calculate_gain(
+    body: PlanTriggerRequest,
+    p: Property,
+    fixed_gain: float,
+    eco_packages: None | dict = None,
+    already_installed_gain: float = 0,
+) -> float | None:
    """
    Calculates the target gain value for optimisation based on the goal.

@ -221,6 +226,7 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float,
    fixed_gain : float
        Total fixed gain from required measures (returned by calculate_fixed_gain).
    eco_packages : dict, optional
+    already_installed_gain: float, optional

    Returns
    -------
@ -228,13 +234,17 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float,
        Required SAP gain for EPC, or None for non-EPC goals.
    """
    if body.goal == "Increasing EPC":
-        current_sap = int(p.data["current-energy-efficiency"])
+        current_sap = int(p.data["current-energy-efficiency"]) + already_installed_gain

        target_sap = (
            eco_packages.get(p.id)[1] if eco_packages.get(p.id)[1] is not None
            else epc_to_sap_lower_bound(body.goal_value)
        )

+        if target_sap <= current_sap:
+            # We've already met or exceeded the target EPC
+            return 0
+
        gain = CostOptimiser.calculate_sap_gain_with_slack(
            target_sap - current_sap
        ) - fixed_gain
--- a/recommendations/tests/test_optimiser_functions.py
+++ b/recommendations/tests/test_optimiser_functions.py
@ -85,6 +85,22 @@ class TestCalculateGain:
        gain = optimiser_functions.calculate_gain(body, prop, fixed_gain=0)
        assert gain is None

+    def test_returns_zero_for_already_installed_getting_to_target(self):
+        body = SimpleNamespace(goal="Increasing EPC", goal_value="C")
+        p = SimpleNamespace(data={"current-energy-efficiency": "67"}, id=1)
+        fixed_gain = 0
+        eco_packages = {1: (None, None, None, [])}
+        already_installed_sap = 2
+        gain = optimiser_functions.calculate_gain(
+            body=body,
+            p=p,
+            fixed_gain=fixed_gain,
+            eco_packages=eco_packages,
+            already_installed_gain=already_installed_sap
+        )
+
+        assert gain == 0
+
    def test_calculates_gain_for_epc(self, monkeypatch):
        # patch cost optimiser calculation
        monkeypatch.setattr(optimiser_functions, "epc_to_sap_lower_bound", lambda goal_value: 69)
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@ -14,14 +14,16 @@ from collections import defaultdict

 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 434  # Peabody
+PORTFOLIO_ID = 435  # Peabody
 SCENARIOS = [
-    904,
-    905
+    908,
+    909,
+    910,
 ]
 scenario_names = {
-    904: "EPC C - no solid floor, ashp 3.0",
-    905: "EPC B - no solid floor, ashp 3.0",
+    908: "EPC C - no solid floor, ashp 3.0",
+    909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
+    910: "EPC B - no solid floor, no EWI, ashp 3.0"
 }


@ -231,7 +233,7 @@ for scenario_id in SCENARIOS:

    # Create excel to store to
    filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-                f"Project/Final SAL/{scenario_names[scenario_id]} - corrected.xlsx")
+                f"Project/Final SAL/{scenario_names[scenario_id]} - 20250113 final.xlsx")
    with pd.ExcelWriter(filename) as writer:
        df.to_excel(writer, sheet_name="properties", index=False)