implemented required measures for mod

2026-07-27 23:35:01 +00:00 · 2025-03-19 18:50:21 +00:00 · 2025-03-19 18:50:21 +00:00 · 7111f1a43a
commit 7111f1a43a
parent 292da782a0
13 changed files with 661 additions and 53 deletions
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -7,7 +7,7 @@
      <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
    </content>
-    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyNamespacePackagesService">
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,7 +3,7 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
  <component name="PyCharmProfessionalAdvertiser">
    <option name="shown" value="true" />
  </component>
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@ -545,7 +545,10 @@ class AssetList:
                raise ValueError("Missing full address - please specify columns to concatenate")
            self.full_address_colname = self.STANDARD_FULL_ADDRESS
            self.standardised_asset_list[self.full_address_colname] = (
-                self.standardised_asset_list[self.full_address_cols_to_concat].apply(lambda x: ", ".join(x), axis=1)
+                self.standardised_asset_list[self.full_address_cols_to_concat].apply(
+                    lambda x: ", ".join([y for y in x if not pd.isnull(y)]),
+                    axis=1
+                )
            )
        else:

--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -88,6 +88,31 @@ def app():
    # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
    # - Or the insulation required is loft/cavity (floors should be solid)

+    # PFP
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East"
+    data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx"
+    sheet_name = "PFP EAST"
+    postcode_column = 'Postcode'
+    fulladdress_column = None
+    address1_column = "AddressLine1"
+    address1_method = None
+    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
+    missing_postcodes_method = None
+    landlord_year_built = None
+    landlord_os_uprn = None
+    landlord_property_type = "Archetype"
+    landlord_built_form = "Archetype"
+    landlord_wall_construction = None
+    landlord_heating_system = None
+    landlord_existing_pv = None
+    landlord_property_id = "Uprn"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+
    # Wates
    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - "
    data_filename = "ECO 4 Wates.xlsx"
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@ -185,7 +185,7 @@ class GoogleSolarApi:
            ):
                self.exclude_likely_duplicate_surfaces()

-        # TODO: We need to constrain the roof area, based on the floor area to be more conservative
+        # We constrain the roof area, based on the floor area to be more conservative
        self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
        if self.roof_area > property_instance.roof_area * self.ROOF_AREA_TOLERANCE:
            self.roof_area = property_instance.roof_area
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -704,21 +704,70 @@ async def trigger_plan(body: PlanTriggerRequest):
            if not recommendations.get(p.id):
                continue

-            input_measures = prepare_input_measures(recommendations[p.id], body.goal)
+            # we need to double unlist because we have a list of lists
+            property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs}
+
+            measures_to_optimise = recommendations[p.id]
+            property_required_measures = []
+            if body.required_measures:
+                property_required_measures = [
+                    m for m in measures_to_optimise if m[0]["type"] in body.required_measures
+                ]
+                measures_to_optimise = [
+                    m for m in measures_to_optimise if m[0]["type"] not in body.required_measures
+                ]
+
+            # If we have a wall insulation measure, we MUST include mechanical ventilation
+            # Additionally, if we have required measures, they should also be included. Therefore
+            # we can discount the number of points required to get to the target SAP band (or increase)
+            # in the case of ventilation
+            measures_needing_ventilation = [
+                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
+            ]
+            needs_ventilation = any(x in property_measure_types for x in measures_needing_ventilation)
+
+            input_measures = prepare_input_measures(
+                measures_to_optimise, body.goal, needs_ventilation, measures_needing_ventilation
+            )

            if not input_measures[0]:
                # This means that we have no defaults
                selected_recommendations = {}
            else:

+                fixed_gain = 0
+                if property_required_measures:
+                    # We get the SAP points for the required measures
+                    if body.goal != "Increasing EPC":
+                        raise NotImplementedError("Only EPC optimisation is currently supported")
+                    sap_by_type = [
+                        {"type": rec["type"], "sap_points": rec["sap_points"]} for recs in property_required_measures
+                        for rec in recs
+                    ]
+                    # We get a MAX sap points per type
+                    max_per_type = (
+                        pd.DataFrame(sap_by_type).groupby("type")["sap_points"].max().to_dict()
+                    )
+                    fixed_gain = sum(max_per_type.values())
+
+                    property_required_measure_types = {rec["type"] for rec in sap_by_type}
+
+                    # if the property needs ventilation, but the measure we optimise didn't include
+                    # venilation we add the points for ventilation as a fixed gain
+                    if needs_ventilation and any(
+                        r in property_required_measure_types for r in measures_needing_ventilation
+                    ):
+                        fixed_gain += next(
+                            (r[0]["sap_points"] for r in recommendations[p.id] if
+                             r[0]["type"] == "mechanical_ventilation"),
+                            0
+                        )
+
                current_sap_points = int(p.data["current-energy-efficiency"])
-                ventilation_impact = next(
-                    (r[0]["sap_points"] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
-                    0
-                )
+
                sap_gain = CostOptimiser.calculate_sap_gain_with_slack(
                    epc_to_sap_lower_bound(body.goal_value) - current_sap_points
-                ) + abs(ventilation_impact)
+                ) - fixed_gain

                if not body.optimise:
                    if body.goal != "Increasing EPC":
@ -748,6 +797,31 @@ async def trigger_plan(body: PlanTriggerRequest):

                selected_recommendations = {r["id"] for r in solution}

+            if property_required_measures:
+                # We select the cheapest of the required measures, into selected
+                for recs in property_required_measures:
+                    # We select the cheapest of the required measures
+                    cost_to_id = {
+                        rec["recommendation_id"]: rec["total"] for rec in recs
+                        if rec["recommendation_id"] not in selected_recommendations
+                    }
+                    # Take the recommendation id with the lowers cost
+
+                    selected_recommendations.add(min(cost_to_id, key=cost_to_id.get))
+                    # Update the solution with the selected recommendaitons
+                    solution = []
+                    for recs in recommendations[p.id]:
+                        for rec in recs:
+                            if rec["recommendation_id"] in selected_recommendations:
+                                solution.append(
+                                    {
+                                        "id": rec["recommendation_id"],
+                                        "cost": rec["total"],
+                                        "gain": rec["sap_points"],
+                                        "type": rec["type"]
+                                    }
+                                )
+
            # If wall insulation is selected, we also include mechanical ventilation as a best practice measure
            if any(x in [r["type"] for r in solution] for x in [
                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@ -75,6 +75,8 @@ class PlanTriggerRequest(BaseModel):
    valuation_file_path: Optional[str] = None
    exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
    inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
+    # This is a list of measures that we want to be included, if they are options
+    required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)

    scenario_name: Optional[str] = ""
    multi_plan: Optional[bool] = False
--- a/etl/customers/mod/pilot/1.
+++ b/etl/customers/mod/pilot/1.
@ -104,13 +104,15 @@ def app():
            }
        )

+    # also include the floor area
    asset_list = df[
-        ["uprn", "address1", "postcode", "NUMBER_OF_BEDROOMS", "BLDNG_STOREYS_QTY", ]
+        ["uprn", "address1", "postcode", "NUMBER_OF_BEDROOMS", "BLDNG_STOREYS_QTY", "BLDNG_MSRMNT_VAL"]
    ].rename(
        columns={
            "address1": "address",
            "NUMBER_OF_BEDROOMS": "n_bedrooms",
-            "BLDNG_STOREYS_QTY": "number_of_floors"
+            "BLDNG_STOREYS_QTY": "number_of_floors",
+            "BLDNG_MSRMNT_VAL": "floor_area"
        }
    )

--- a/etl/customers/mod/pilot/2.
+++ b/etl/customers/mod/pilot/2.
@ -0,0 +1,398 @@
+from pprint import pprint
+import pandas as pd
+import numpy as np
+from backend.app.utils import sap_to_epc
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+
+
+def get_data(portfolio_id, scenario_ids):
+    session = sessionmaker(bind=db_engine)()
+    session.begin()
+
+    # Get properties and their details for a specific portfolio
+    properties_query = session.query(
+        PropertyModel,
+        PropertyDetailsEpcModel
+    ).join(
+        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
+    ).filter(
+        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
+    ).all()
+
+    # Transform properties data to include all fields dynamically
+    properties_data = [
+        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
+         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
+            PropertyDetailsEpcModel.__table__.columns}}
+        for prop in properties_query
+    ]
+
+    # Get property IDs from fetched properties
+
+    # Get plans linked to the fetched properties
+    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+
+    # Transform plans data to include all fields dynamically
+    plans_data = [
+        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        for plan in plans_query
+    ]
+
+    # Extract plan IDs for filtering recommendations through PlanRecommendations
+    plan_ids = [plan['id'] for plan in plans_data]
+
+    # Get recommendations through PlanRecommendations for those plans and that are default
+    recommendations_query = session.query(
+        Recommendation,
+        Plan.scenario_id
+    ).join(
+        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
+    ).join(
+        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
+    ).filter(
+        PlanRecommendations.plan_id.in_(plan_ids),
+        Recommendation.default == True  # Filtering for default recommendations
+    ).all()
+
+    # Transform recommendations data to include all fields dynamically and include scenario_id
+    recommendations_data = [
+        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation')
+        else getattr(rec, col.name) for
+            col in Recommendation.__table__.columns},
+         "Scenario ID": rec.scenario_id}
+        for rec in recommendations_query
+    ]
+
+    session.close()
+
+    return properties_data, plans_data, recommendations_data
+
+
+def app():
+    """
+    Given a portfolio and a scenario, this function prepares an excel model to present the data
+    """
+
+    # Set the inputs:
+    portfolio_id = 139
+    scenario_ids = [233, 234]
+
+    properties_data, plans_data, recommendations_data = get_data(
+        portfolio_id=portfolio_id, scenario_ids=scenario_ids
+    )
+
+    properties_df = pd.DataFrame(properties_data)
+    plans_df = pd.DataFrame(plans_data)
+    recommendations_df = pd.DataFrame(recommendations_data)
+
+    # Merge on the orignal data
+    mod_property_data = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD property data.csv"
+    )
+
+    property_asset_data = properties_df.merge(
+        mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn"
+    )
+
+    property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False)
+    property_asset_data["pre_2002"] = property_asset_data["BUILD_YEAR"] < 2002
+    property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip()
+    property_asset_data["is_insulated"] = (
+        property_asset_data["walls"].str.split(",").str[1].str.strip().isin(
+            ["filled cavity", "with external insulation", "filled cavity and external insulation"]
+        ) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"])
+    )
+    property_asset_data["is_insulated"] = np.where(
+        property_asset_data["is_insulated"], "Insulated", "Uninsulated"
+    )
+    property_asset_data["is_pitched"] = np.where(
+        property_asset_data["is_pitched"], "Pitched roof", "Not Pitched Roof"
+    )
+    property_asset_data["pre_2002"] = np.where(
+        property_asset_data["pre_2002"], "Pre 2002", "Post 2002"
+    )
+
+    archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_2002"]
+
+    assigned_archetypes = (
+        property_asset_data.groupby(
+            archetype_variables
+        ).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False)
+    )
+
+    # Make the archetype ID a concatenation of the variables
+    assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply(
+        lambda x: "_".join(x.astype(str)), axis=1
+    )
+
+    # Most prominent archetypes
+    prominent_archetypes = assigned_archetypes.head(3)
+    other_archetypes = assigned_archetypes.tail(-3)
+    # 2 or fewer properties in the other archetypes
+
+    property_asset_data = property_asset_data.merge(
+        assigned_archetypes[archetype_variables + ["archetype_id"]],
+        how="left",
+        on=archetype_variables
+    )
+
+    # Create age bands:
+    # 1960-1969
+    # 1970-1979
+    # 1980-1989
+    # 1990-1999
+    # 2000+
+    property_asset_data["age_band"] = pd.cut(
+        property_asset_data["BUILD_YEAR"],
+        bins=[1959, 1969, 1979, 1989, 1999, 2022],
+        labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"]
+    )
+
+    # Create floor area bands
+    # 0-73
+    # 74-97
+    # 98-199
+    # 200+
+    property_asset_data["floor_area_band"] = pd.cut(
+        property_asset_data["total_floor_area"],
+        bins=[0, 73, 97, 199, 10000],
+        labels=["0-73", "74-97", "98-199", "200+"]
+    )
+
+    property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
+    property_asset_data["archetype_group"] = np.where(
+        property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values),
+        "other",
+        property_asset_data["archetype_group"]
+    )
+
+    # For colour
+    wall_types = (
+        property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename(
+            columns={"wall_type": "Wall Type"}
+        )
+    )
+    # Group into age bands
+    ages = (
+        property_asset_data[["age_band"]].value_counts()
+        .to_frame()
+        .reset_index().sort_values("age_band", ascending=True)
+        .rename(columns={"age_band": "Age Band"})
+    )
+    floor_area_bands = (
+        property_asset_data[["floor_area_band"]].value_counts()
+        .to_frame()
+        .reset_index().sort_values("floor_area_band", ascending=True)
+        .rename(columns={"floor_area_band": "Floor Area Band"})
+    )
+    archetype_counts = (
+        property_asset_data[["archetype_group"]].
+        value_counts().
+        to_frame().
+        reset_index()
+        .rename(columns={"archetype_group": "Archetype"})
+    )
+
+    # epc breakdown
+    epc_breakdown = (
+        property_asset_data["current_epc_rating"]
+        .apply(lambda x: x.value)
+        .value_counts()
+        .to_frame()
+        .reset_index()
+    )
+
+    # Figures for the deck
+    # Carbon per property
+    totals = property_asset_data[
+        [
+            "Total_household_members",
+            "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
+            "heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
+            "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
+        ]
+    ].copy()
+    totals["total_cost"] = (
+        totals["heating_cost_current"] +
+        totals["hot_water_cost_current"] +
+        totals["lighting_cost_current"] +
+        totals["appliances_cost_current"] +
+        totals["gas_standing_charge"] +
+        totals["electricity_standing_charge"]
+    )
+    print(
+        totals[
+            [
+                "Total_household_members",
+                "co2_emissions",
+                "current_energy_demand",
+                "total_cost",
+            ]
+        ].mean()
+    )
+
+    # Store these to an excel
+    # with pd.ExcelWriter(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD archetype breakdowns.xlsx"
+    # ) as writer:
+    #     wall_types.to_excel(writer, sheet_name="Wall Types", index=False)
+    #     ages.to_excel(writer, sheet_name="Ages", index=False)
+    #     floor_area_bands.to_excel(writer, sheet_name="Floor Area Bands", index=False)
+    #     archetype_counts.to_excel(writer, sheet_name="Archetype Counts", index=False)
+    #     epc_breakdown.to_excel(writer, sheet_name="EPC Rating", index=False)
+
+    contingency = 0.26
+
+    # We prepare the outputs, by scenario
+    scenario_data = {}
+    for scenario in scenario_ids:
+
+        scenario_recommendations_df = recommendations_df[
+            recommendations_df["Scenario ID"] == scenario
+            ].copy()
+
+        scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"]
+        scenario_recommendations_df["total_cost"] = (
+            scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"]
+        )
+
+        recommended_measures_df = scenario_recommendations_df[
+            ["property_id", "measure_type", "estimated_cost", "default"]
+        ]
+
+        recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
+        recommended_measures_df = recommended_measures_df.drop(columns=["default"])
+
+        # Metrics by property ID
+        aggregated_metrics = scenario_recommendations_df[
+            [
+                "property_id", "type", "default", "sap_points",
+                "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency",
+                "total_cost"
+            ]
+        ]
+        aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
+        aggregated_metrics = aggregated_metrics.groupby("property_id")[
+            ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
+             "total_cost", "contingency"]
+        ].sum().reset_index()
+
+        recommendations_measures_pivot = recommended_measures_df.pivot(
+            index='property_id',
+            columns='measure_type',
+            values='estimated_cost'
+        )
+        recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
+        recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
+
+        # We flag with boolean if the measure is recommended
+        for c in recommendations_measures_pivot.columns:
+            if c == "property_id":
+                continue
+            recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0
+
+        # We now create a final output
+        df = properties_df[
+            [
+                "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
+                "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
+            ]
+        ].merge(
+            recommendations_measures_pivot, how="left", on="property_id"
+        ).merge(
+            aggregated_metrics, how="left", on="property_id"
+        )
+
+        df = df.drop(columns=["property_id"])
+        for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
+            df[c] = df[c].fillna(0)
+
+        df = df.rename(
+            columns={
+                "uprn": "UPRN",
+                "address": "Address",
+                "postcode": "Postcode",
+                "walls": "Walls",
+                "roof": "Roof",
+                "heating": "Heating",
+                "windows": "Windows",
+                "current_epc_rating": "Current EPC Rating",
+                "current_sap_points": "Current SAP Points",
+                "total_floor_area": "Total Floor Area",
+                "number_of_rooms": "Number of Habitable Rooms",
+                "floor_height": "Floor Height",
+            }
+        )
+
+        # Calculate post SAP
+        df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
+        df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
+        df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
+
+        # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
+        # the bills go up recommending HHRSH, so it doesn't make it to EPC B
+        # For mid-terrace units, use the ordnance survey API to check if there is space for a heat pump?
+        # DO it manually???
+
+        # Doesn't make it
+        # misses = df[df["Predicted Post Works EPC"] == "C"]
+        # # 5 of them are flats and so are difficult to get to EPC B without renewables. Possibly not worth it from an
+        # # ROI perspective
+        #
+        # misses[["UPRN", "Address", "Postcode", "property_type"]]
+
+        #              UPRN                               Address  Postcode property_type
+        # 2    100120988937              13 Sidbury Circular Road   SP9 7HX          Flat  No further action
+        # 3    100120988998              74 Sidbury Circular Road   SP9 7JA          Flat  No further action
+        # 4    100120989416                       47 Zouch Avenue   SP9 7LR          Flat  No further action
+        # 6    100060585002  42, Muscott Close, Shipton Bellinger   SP9 7TX         House  Can probably take a heat pump
+        # 37    10000801072        34 Luffenham Place, Chicksands  SG17 5XH         House  Already surveyed as having
+        # an ASHP - should be looked at
+        # 121  100120988259                      8, Karachi Close   SP9 7LW          Flat
+        # 122  100121101217                     599, Pepper Place  BA12 0DW          Flat
+        # 140  100021455241         33 Blenheim Crescent, Ruislip   HA4 7HA         House - Solar isnt recommended
+        # due to bug
+        # 149  100120915656            10 Bower Green, Shrivenham   SN6 8TU         House - Solar isn't recommended
+        # due to bug
+
+        scenario_data[scenario] = df
+
+    measure_counts = {}
+    for scenario in scenario_ids:
+        recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
+        measure_counts[scenario] = scenario_data[scenario][recommendation_cols].sum().to_dict()
+
+    pprint(measure_counts[scenario_ids[0]])
+    pprint(measure_counts[scenario_ids[1]])
+
+    df = scenario_data[scenario_ids[1]]
+    z = df[
+        (df["Walls"] == "Cavity wall, as built, no insulation") & (~df["Recommendation: cavity_wall_insulation"])
+        ]
+
+    # Scenario adjustments:
+    # Exclude: boiler_upgrade
+    # Make ASHP COP 3.5
+
+    # Metrics we need by scenario:
+    # Cost
+    # contingency
+    # Carbon
+    # kwh
+    # bill savings
+    scenario_metrics = {}
+    for scenario in scenario_ids:
+        df = scenario_data[scenario].copy()
+        df["cost_per_sap_point"] = df["total_cost"] / df["sap_points"]
+        df["cost_per_carbon"] = df["total_cost"] / df["co2_equivalent_savings"]
+        avg_savings = df[
+            ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
+             "cost_per_sap_point", "cost_per_carbon", "total_cost", "contingency"]
+        ].mean().to_dict()
+
+    # TODO: Add a slide on valuation improvement, on a sample of properties?
+
+    # TODO: Read in costing data and breakdown
--- a/etl/customers/united
+++ b/etl/customers/united
@ -0,0 +1,73 @@
+import os
+import pandas as pd
+import numpy as np
+from asset_list.utils import get_data
+from backend.SearchEpc import SearchEpc
+from etl.spatial.OpenUprnClient import OpenUprnClient
+
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03.xlsx"
+
+    df = pd.read_excel(filepath)
+    df["row_id"] = df.index
+
+    df["house_number"] = df.apply(
+        lambda x: SearchEpc.get_house_number(x["Address"], x["Postcode"]),
+        axis=1
+    )
+
+    properties_data, _, _ = get_data(
+        df=df,
+        manual_uprn_map={},
+        epc_auth_token=EPC_AUTH_TOKEN,
+        uprn_column=None,
+        fulladdress_column="Address",
+        address1_column="house_number",
+        postcode_column="Postcode",
+        property_type_column=None,
+        built_form_column=None,
+        epc_api_only=True,
+        row_id_name="row_id",
+    )
+
+    no_data = df[df["row_id"].isin(_)]
+    no_data[["Address", "Postcode"]]
+
+    # 53  108 Alexandra Street  OL6 9QP 100011536830
+    # 56    301 Whiteacre Road  OL6 9QF 100011557437
+    # 65    97 Princess Street  OL6 9QJ 100011551813
+
+    data = df.merge(
+        pd.DataFrame(properties_data)[["uprn", "row_id"]],
+        how="left", left_on="row_id", right_on="row_id"
+    )
+
+    # Fill missing UPRNS
+    data["uprn"] = np.where(data["Address"] == "108 Alexandra Street", 100011536830, data["uprn"])
+    data["uprn"] = np.where(data["Address"] == "301 Whiteacre Road", 100011557437, data["uprn"])
+    data["uprn"] = np.where(data["Address"] == "97 Princess Street", 100011551813, data["uprn"])
+
+    # We now get whether the property is listed, heritage or in a conservation area
+    spatial_data = OpenUprnClient.get_spatial_data(uprns=data["uprn"].tolist(), bucket_name="retrofit-data-dev")
+    spatial_data = spatial_data.rename(columns={"UPRN": "uprn"})
+
+    data["uprn"] = data["uprn"].astype(int)
+
+    merged = data.merge(
+        spatial_data, how="left", on="uprn"
+    )
+    # fill NAs
+    for c in ['conservation_status', 'is_listed_building', 'is_heritage_building']:
+        merged[c] = merged[c].fillna(False)
+
+    merged.to_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03 - data "
+        "pulled.xlsx",
+        index=False
+    )
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@ -37,22 +37,25 @@ MCS_SOLAR_PV_COST_DATA = {
    "average_cost_per_kwh-Northern Ireland": 1347,
 }

+# Installers are now working with 435 watt panels
+PANEL_SIZE = 0.435
+
 INSTALLER_SOLAR_COSTS = [
-    {'n_panels': 4, 'array_kwp': 1.6, 'cost': 3040.00, 'installer': 'CEG'},
-    {'n_panels': 5, 'array_kwp': 2.1, 'cost': 3201.00, 'installer': 'CEG'},
-    {'n_panels': 6, 'array_kwp': 2.5, 'cost': 3363.00, 'installer': 'CEG'},
-    {'n_panels': 7, 'array_kwp': 2.9, 'cost': 3524.00, 'installer': 'CEG'},
-    {'n_panels': 8, 'array_kwp': 3.3, 'cost': 3686.00, 'installer': 'CEG'},
-    {'n_panels': 9, 'array_kwp': 3.7, 'cost': 3847.00, 'installer': 'CEG'},
-    {'n_panels': 10, 'array_kwp': 4.1, 'cost': 4009.00, 'installer': 'CEG'},
-    {'n_panels': 11, 'array_kwp': 4.5, 'cost': 4170.00, 'installer': 'CEG'},
-    {'n_panels': 12, 'array_kwp': 4.9, 'cost': 4332.00, 'installer': 'CEG'},
-    {'n_panels': 13, 'array_kwp': 5.3, 'cost': 4835.00, 'installer': 'CEG'},
-    {'n_panels': 14, 'array_kwp': 5.7, 'cost': 5015.00, 'installer': 'CEG'},
-    {'n_panels': 15, 'array_kwp': 6.2, 'cost': 5176.00, 'installer': 'CEG'},
-    {'n_panels': 16, 'array_kwp': 6.6, 'cost': 5338.00, 'installer': 'CEG'},
-    {'n_panels': 17, 'array_kwp': 7.0, 'cost': 5500.00, 'installer': 'CEG'},
-    {'n_panels': 18, 'array_kwp': 7.4, 'cost': 6021.00, 'installer': 'CEG'}
+    {'n_panels': 4, 'array_kwp': 4 * PANEL_SIZE, 'cost': 4089.25, 'installer': 'CEG'},
+    {'n_panels': 5, 'array_kwp': 5 * PANEL_SIZE, 'cost': 4242.48, 'installer': 'CEG'},
+    {'n_panels': 6, 'array_kwp': 6 * PANEL_SIZE, 'cost': 4395.71, 'installer': 'CEG'},
+    {'n_panels': 7, 'array_kwp': 7 * PANEL_SIZE, 'cost': 4548.94, 'installer': 'CEG'},
+    {'n_panels': 8, 'array_kwp': 8 * PANEL_SIZE, 'cost': 4702.17, 'installer': 'CEG'},
+    {'n_panels': 9, 'array_kwp': 9 * PANEL_SIZE, 'cost': 4855.41, 'installer': 'CEG'},
+    {'n_panels': 10, 'array_kwp': 10 * PANEL_SIZE, 'cost': 5010.95, 'installer': 'CEG'},
+    {'n_panels': 11, 'array_kwp': 11 * PANEL_SIZE, 'cost': 5166.49, 'installer': 'CEG'},
+    {'n_panels': 12, 'array_kwp': 12 * PANEL_SIZE, 'cost': 5322.04, 'installer': 'CEG'},
+    {'n_panels': 13, 'array_kwp': 13 * PANEL_SIZE, 'cost': 5657.6, 'installer': 'CEG'},
+    {'n_panels': 14, 'array_kwp': 14 * PANEL_SIZE, 'cost': 5993.16, 'installer': 'CEG'},
+    {'n_panels': 15, 'array_kwp': 15 * PANEL_SIZE, 'cost': 6328.71, 'installer': 'CEG'},
+    {'n_panels': 16, 'array_kwp': 16 * PANEL_SIZE, 'cost': 6483.33, 'installer': 'CEG'},
+    {'n_panels': 17, 'array_kwp': 17 * PANEL_SIZE, 'cost': 6637.95, 'installer': 'CEG'},
+    {'n_panels': 18, 'array_kwp': 18 * PANEL_SIZE, 'cost': 6792.57, 'installer': 'CEG'}
 ]
 # This is the maximum number of panels that we have a cost from the installers for
 INSTALLER_MAX_PANELS = 18
@ -62,11 +65,11 @@ INSTALLER_MAX_PANELS = 18
 INSTALLER_SOLAR_PV_INVERTER_COST = 7500
 INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST = 500  # Just a rough guess to labour costs

-INSTALLER_SCAFFOLDING_COSTS = [
-    {'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'},
-    {'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'},
-    {'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'}
-]
+# INSTALLER_SCAFFOLDING_COSTS = [
+#     {'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'},
+#     {'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'},
+#     {'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'}
+# ]

 # This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
 # to be conservative
@ -772,18 +775,14 @@ class Costs:
            battery_cost = [c for c in INSTALLER_SOLAR_BATTERY_COSTS if c["capacity_kwh"] == battery_kwh][0]["cost"]
            subtotal += battery_cost

-        scaffolding_cost = [c for c in INSTALLER_SCAFFOLDING_COSTS if c["stories"] == n_floors][0]["cost"]
-        subtotal += scaffolding_cost
-
        if needs_inverter:
            subtotal += INSTALLER_SOLAR_PV_INVERTER_COST
            # We also add an additional labour cost
            subtotal += INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST

-        # We add an additional cost for scaffolding
-        # The costs from installers exclude VAT
-        vat = subtotal * cls.VAT_RATE
-        total_cost = subtotal + vat
+        # Solar doesn't have VAT but we add a high risk contingency
+        # to account for design variation that we see in practice
+        total_cost = subtotal * (1 + cls.HIGH_RISK_CONTINGENCY)

        # Labour hours are based on estimates from online research but an average team seems to consist of 3 people
        # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 48 hours of
@ -791,7 +790,7 @@ class Costs:
        return {
            "total": total_cost,
            "subtotal": subtotal,
-            "vat": vat,
+            "vat": 0,
            "labour_hours": 48,
            "labour_days": 2,
        }
@ -1161,7 +1160,6 @@ class Costs:
        pump. This cost will include the boiler upgrade scheme grant

        """
-
        # This is the average cost of a project, we'll add some additional contingency

        if ashp_size is None:
@ -1170,7 +1168,7 @@ class Costs:
            cost = [x for x in INSTALLER_ASHP_COSTS if x][0]["cost"]

        # We add some contingency since there are additional costs such as resizing radiators, that could be required
-        subtotal = cost * (1 + self.CONTINGENCY)
+        subtotal = cost * (1 + self.HIGH_RISK_CONTINGENCY)
        # The costs from installers exclude VAT
        vat = subtotal * self.VAT_RATE
        total_cost = subtotal + vat
@ -1180,7 +1178,7 @@ class Costs:
        labour_hours = labour_days * 8

        return {
-            "total": subtotal,
+            "total": total_cost,
            "subtotal": subtotal,
            "vat": vat,
            "labour_hours": labour_hours,
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@ -145,7 +145,9 @@ class FloorRecommendations(Definitions):
            )
            return

-        raise NotImplementedError("Implement me!")
+        # In this case, we have no recommendation to make. E.g., if we have a solid floor property
+        # but solid floor insulation has been excluded as a measure, we get here
+        return

    @staticmethod
    def _make_floor_description(material):
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@ -1,10 +1,12 @@
-def prepare_input_measures(property_recommendations, goal):
+def prepare_input_measures(property_recommendations, goal, needs_ventilation, measures_needing_ventilation):
    """
    Basic function to convert recommendations_to_upload to a format that is
    suitable for the optimiser - large
    :param property_recommendations:   object containing the recommendations, created in the plan trigger api
    :param goal:    goal to be optimised for, should be one of the keys in gain_map. E.g. if the gain is SAP points,
                    the goal should reflect that desired gain
+    :param needs_ventilation:  boolean to indicate if the property needs ventilation
+    :param measures_needing_ventilation:    list of measures that need ventilation
    :return:    Nested list of input measures
    """

@ -16,9 +18,20 @@ def prepare_input_measures(property_recommendations, goal):
    if not goal_key:
        raise NotImplementedError("Not implemented this gain type - investigate me")

+    # We ony ever have one ventilation measure with now
+    ventilation_recommendation = next(
+        (measure[0] for measure in property_recommendations if measure[0]["type"] == "mechanical_ventilation"),
+        {}
+    )
+
    input_measures = []
    for recs in property_recommendations:

+        if needs_ventilation and recs[0]["type"] == "mechanical_ventilation":
+            # If we house needs ventilation, ventilation will be packaged with the fabric measure so
+            # we don't need to optimise it independently
+            continue
+
        if recs[0]["type"] == "solar_pv":
            # if the recommendation is a solar recommendation with a battery, we exclude it from the optimisation.
            recs = [r for r in recs if ~r["has_battery"]]
@ -27,16 +40,34 @@ def prepare_input_measures(property_recommendations, goal):
        if not recs_to_append:
            continue

-        input_measures.append(
-            [
+        to_append = []
+        for rec in recs:
+            # We bundle the impact of ventilation with the measure
+            total = (
+                rec["total"] + ventilation_recommendation["total"] if rec["type"] in measures_needing_ventilation
+                else rec["total"]
+            )
+            gain = (
+                rec[goal_key] + ventilation_recommendation[goal_key] if rec["type"] in measures_needing_ventilation
+                else rec[goal_key]
+            )
+
+            rec_type = (
+                "+".join(
+                    [rec["type"], ventilation_recommendation["type"]]
+                ) if rec["type"] in measures_needing_ventilation
+                else rec["type"]
+            )
+
+            to_append.append(
                {
                    "id": rec["recommendation_id"],
-                    "cost": rec["total"],
-                    "gain": rec[goal_key],
-                    "type": rec["type"]
+                    "cost": total,
+                    "gain": gain,
+                    "type": rec_type
                }
-                for rec in recs if rec["energy_cost_savings"] >= 0
-            ]
-        )
+            )
+
+        input_measures.append(to_append)

    return input_measures