Merge pull request #336 from Hestia-Homes/main

Dev deployment
2026-06-08 11:17:27 +00:00 · 2024-09-09 12:44:58 +01:00 · 2024-09-09 12:44:58 +01:00 · 85951e62fb
commit 85951e62fb
parent 6e714127c6 8e2dec0d68
66 changed files with 11044 additions and 1773 deletions
--- a/backend/Property.py
+++ b/backend/Property.py
@ -2,13 +2,13 @@ import os
 import ast
 from itertools import groupby
 import pandas as pd
+import numpy as np
 from datetime import datetime, timedelta

 from etl.epc.Dataset import TrainingDataset
 from etl.epc.Record import EPCRecord
 from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES
 from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
-from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet
 from etl.epc.settings import DATA_ANOMALY_MATCHES
@ -17,10 +17,11 @@ from recommendations.recommendation_utils import (
    estimate_perimeter,
    get_wall_type,
    estimate_external_wall_area,
-    esimtate_pitched_roof_area,
    estimate_windows,
 )
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+from backend.app.utils import sap_to_epc
+import backend.app.assumptions as assumptions

 ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
 DATA_BUCKET = os.environ.get(
@ -76,18 +77,22 @@ class Property:
        already_installed=None,
        non_invasive_recommendations=None,
        measures=None,
+        energy_assessment=None,
+        is_new=True,
        **kwargs
    ):

        self.epc_record = epc_record

        self.id = id
+        self.is_new = is_new

        self.address = address
        self.postcode = postcode
        self.data = {
            k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()
        }
+
        self.old_data = epc_record.get("old_data")
        self.property_dimensions = None
        # This is a list of measures that have already been installed in the property, typically found as a result
@ -158,17 +163,18 @@ class Property:
        self.floor_height = epc_record.prepared_epc.get("floor_height")
        self.insulation_wall_area = None
        self.floor_area = epc_record.prepared_epc.get("total_floor_area")
-        self.pitched_roof_area = None
+        self.roof_area = None
        self.insulation_floor_area = None
        self.number_lighting_outlets = epc_record.prepared_epc.get(
            "fixed_lighting_outlets_count"
        )
        self.floor_level = None
        self.number_of_windows = None
+        self.windows_area = None
        self.solar_pv_percentage = None

-        self.current_adjusted_energy = None
-        self.expected_adjusted_energy = None
+        self.current_energy_consumption = None
+        self.current_energy_consumption_heating_hotwater = None
        self.current_energy_bill = None
        self.expected_energy_bill = None

@ -177,7 +183,14 @@ class Property:

        self.recommendations_scoring_data = []
        self.simulation_epcs = {}
+        self.updated_simulation_epcs = []

+        # This additional condition data should change how we pass kwargs to this. We should no longer need to pass
+        # kwargs to this class, but instead, we should pass the energy assessment condition data
+        self.energy_assessment_condition_data = energy_assessment["condition"]
+        self.energy_assessment_is_newer = energy_assessment["energy_assessment_is_newer"]
+
+        # TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data
        self.parse_kwargs(kwargs)

    @classmethod
@ -188,6 +201,10 @@ class Property:
        :param kwargs:
        :return:
        """
+
+        # Note - none of this data is contained in an energy asssessment, but we should consider how this is done
+        # as we collect more data from the energy assessment
+
        n_bathrooms = kwargs.get("n_bathrooms", None)
        if n_bathrooms not in [None, ""]:
            # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
@ -197,17 +214,32 @@ class Property:
        if n_bedrooms not in [None, ""]:
            n_bedrooms = int(round(float(n_bedrooms) + 1e-5))

+        number_of_floors = kwargs.get("number_of_floors", None)
+        if number_of_floors not in [None, ""]:
+            number_of_floors = int(round(float(number_of_floors) + 1e-5))
+
+        insulation_floor_area = kwargs.get("insulation_floor_area", None)
+        if insulation_floor_area not in [None, ""]:
+            insulation_floor_area = float(insulation_floor_area)
+
+        insulation_wall_area = kwargs.get("insulation_wall_area", None)
+        if insulation_wall_area not in [None, ""]:
+            insulation_wall_area = float(insulation_wall_area)
+
        return {
            "n_bathrooms": n_bathrooms,
            "n_bedrooms": n_bedrooms,
+            "number_of_floors": number_of_floors,
+            "insulation_floor_area": insulation_floor_area,
+            "insulation_wall_area": insulation_wall_area,
            "building_id": kwargs.get("building_id", None),
        }

    def parse_kwargs(self, kwargs):
        # We extract the elements from kwargs that we recognise. Anything additional is ignored
-        self.n_bathrooms = kwargs.get("n_bathrooms", None)
-        self.n_bedrooms = kwargs.get("n_bedrooms", None)
-        self.building_id = kwargs.get("building_id", None)
+        for arg, val in kwargs.items():
+            if val is not None:
+                setattr(self, arg, val)

    def create_base_difference_epc_record(self, cleaned_lookup: dict):
        """
@ -333,7 +365,7 @@ class Property:
            for rec in property_recommendations_by_phase:
                # We simulate the impact of the recommendation at this current phase, and all of the prior phases

-                if rec["type"] == "mechanical_ventilation":
+                if rec["type"] in ["mechanical_ventilation", "trickle_vents", "draught_proofing"]:
                    continue

                scoring_dict = self.create_recommendation_scoring_data(
@ -345,65 +377,88 @@ class Property:
                )
                self.recommendations_scoring_data.append(scoring_dict)

-            # We also use the representative recommendations to produce transformed EPCs
-            represenative_recs_to_this_phase = [
-                r for r in property_representative_recommendations
-                if r["phase"] <= phase
-            ]
+                simulation_epc = self.epc_record.prepared_epc.copy()
+                # Insert static values
+                simulation_epc["lodgement_date"] = simulation_lodgment_date
+                simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}

-            # TODO: This is placeholder, but it's to handle the case of having both internal and external wall
-            #       insulation as options. This will cause the process below to fall over, so we take just
-            #       external wall insulation in epc_transformations, if we have both
-            types = [
-                x["type"] for x in represenative_recs_to_this_phase
-            ]
-            if "external_wall_insulation" in types and "internal_wall_insulation" in types:
+                types = [x["type"] for x in previous_phase_representatives]
+                if "external_wall_insulation" in types and "internal_wall_insulation" in types:
+                    raise Exception("We shouldn't have this in the representative recommendations")
+                # We include previous phases + the recommendation itself in the EPC transformations
                epc_transformations = [
-                    x["description_simulation"] for x in represenative_recs_to_this_phase if
-                    x["type"] != "internal_wall_insulation"
+                    x["description_simulation"] for x in previous_phase_representatives + [rec]
                ]
-            else:
-                epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase]

-            # It is possible that we could have two simulations applied to the same descriptions
-            # We extract these out
-            phase_epc_transformation = {}
-            for config in epc_transformations:
-                for k, v in config.items():
-                    if k in phase_epc_transformation:
-                        if "-energy-eff" in k:
-                            # We take the highest value
-                            if phase_epc_transformation[k] == "Very Good":
+                # It is possible that we could have two simulations applied to the same descriptions
+                # We extract these out
+                phase_epc_transformation = {}
+                for config in epc_transformations:
+                    for k, v in config.items():
+                        if k in phase_epc_transformation:
+                            if "-energy-eff" in k:
+                                # We take the highest value
+                                if phase_epc_transformation[k] == "Very Good":
+                                    continue
+                                elif phase_epc_transformation[k] == "Good":
+                                    if v == "Very Good":
+                                        phase_epc_transformation[k] = v
+                                elif phase_epc_transformation[k] == "Average":
+                                    if v in ["Good", "Very Good"]:
+                                        phase_epc_transformation[k] = v
+                                elif phase_epc_transformation[k] == "Poor":
+                                    if v in ["Average", "Good", "Very Good"]:
+                                        phase_epc_transformation[k] = v
+                                else:
+                                    phase_epc_transformation[k] = v
+
                                continue
-                            elif phase_epc_transformation[k] == "Good":
-                                if v == "Very Good":
-                                    phase_epc_transformation[k] = v
-                            elif phase_epc_transformation[k] == "Average":
-                                if v in ["Good", "Very Good"]:
-                                    phase_epc_transformation[k] = v
-                            elif phase_epc_transformation[k] == "Poor":
-                                if v in ["Average", "Good", "Very Good"]:
-                                    phase_epc_transformation[k] = v
-                            else:
-                                phase_epc_transformation[k] = v

-                            continue
+                            if phase_epc_transformation[k] == v:
+                                continue

-                        if phase_epc_transformation[k] == v:
-                            continue
+                            raise NotImplementedError(
+                                "Already have this key in the phase_epc_transformation - implement me"
+                            )
+                        phase_epc_transformation[k] = v
+                simulation_epc.update(phase_epc_transformation)
+                self.simulation_epcs[rec["recommendation_id"]] = simulation_epc

-                        raise NotImplementedError(
-                            "Already have this key in the phase_epc_transformation - implement me")
-                    phase_epc_transformation[k] = v
+    def update_simulation_epcs(self, impact_summary):
+        """
+        This method will insert the high level measures, such as SAP, heat demand, carbon, etc
+        :return:
+        """
+        if self.simulation_epcs is None:
+            raise ValueError("Simulation EPCs have not been created")

-            simulation_epc = self.epc_record.prepared_epc.copy()
-            # Insert static values
-            simulation_epc["lodgement_date"] = simulation_lodgment_date
+        rec_ids = sorted(list(self.simulation_epcs.keys()))
+        updated_simulation_epcs = []
+        for rec_id in rec_ids:
+            sim_epc = self.simulation_epcs[rec_id].copy()
+            rec_impact = [x for x in impact_summary if x["recommendation_id"] == rec_id][0]
+            # We update all of the features that should have an impact on the kwh model

-            # Replace the understores with hyphens
-            simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
-            simulation_epc.update(phase_epc_transformation)
-            self.simulation_epcs[phase] = simulation_epc
+            sim_epc.update(
+                {
+                    # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
+                    # per year, we multiply by 1000 to get kg/m²
+                    "co2-emiss-curr-per-floor-area": round(
+                        1000 * (rec_impact["carbon"] / self.data["total-floor-area"])
+                    ),
+                    "co2-emissions-current": rec_impact["carbon"],
+                    "current-energy-rating": sap_to_epc(rec_impact["sap"]),
+                    "current-energy-efficiency": int(np.floor(rec_impact["sap"])),
+                    "energy-consumption-current": rec_impact["heat_demand"],
+                    "id": "+".join([str(self.id), rec_id])
+                }
+            )
+            updated_simulation_epcs.append(sim_epc)
+
+        # Now we havet this data inthe
+        self.updated_simulation_epcs = updated_simulation_epcs
+
+        return updated_simulation_epcs

    @staticmethod
    def create_recommendation_scoring_data(
@ -425,7 +480,6 @@ class Property:
        """

        output = recommendation_record.copy()
-        non_invasive_recommendations = [] if non_invasive_recommendations is None else non_invasive_recommendations

        for col in [
            "walls_insulation_thickness",
@ -438,81 +492,6 @@ class Property:
        for recommendation in recommendations:
            # For the list of recommendations we have, we iteratively update the output

-            # Update description to indicate it's insulate
-            if recommendation["type"] in [
-                "solid_floor_insulation",
-                "suspended_floor_insulation",
-                "exposed_floor_insulation",
-            ]:
-                if len(recommendation["parts"]) > 1:
-                    raise NotImplementedError(
-                        "Have more than 1 floor insulation part - handle this case"
-                    )
-
-                # We don't really see above average for this in the training data
-                output["floor_insulation_thickness_ending"] = "average"
-            else:
-                if output["floor_thermal_transmittance_ending"] is None:
-                    raise ValueError("We should not have a None value for the u value")
-
-                if output["floor_insulation_thickness_ending"] is None:
-                    output["floor_insulation_thickness_ending"] = "none"
-
-            if recommendation["type"] in [
-                "loft_insulation",
-                "room_roof_insulation",
-                "flat_roof_insulation",
-            ]:
-                output["roof_thermal_transmittance_ending"] = recommendation[
-                    "new_u_value"
-                ]
-
-                parts = recommendation["parts"]
-                if len(parts) != 1:
-                    raise ValueError(
-                        "More than one part for roof insulation - investiage me"
-                    )
-
-                # This is based on the values we have in the training data
-                valid_numeric_values = [
-                    12,
-                    25,
-                    50,
-                    75,
-                    100,
-                    150,
-                    200,
-                    250,
-                    270,
-                    300,
-                    350,
-                    400,
-                ]
-
-                proposed_depth = recommendation["new_thickness"]
-                if proposed_depth not in valid_numeric_values:
-                    # Take the nearest value for scoring
-                    proposed_depth = min(
-                        valid_numeric_values, key=lambda x: abs(x - proposed_depth)
-                    )
-
-                output["roof_insulation_thickness_ending"] = str(int(proposed_depth))
-                if recommendation["type"] == "loft_insulation":
-                    if proposed_depth >= 270:
-                        output["roof_energy_eff_ending"] = "Very Good"
-                    else:
-                        if output["roof_energy_eff_ending"] not in ["Good", "Very Good"]:
-                            output["roof_energy_eff_ending"] = "Good"
-                else:
-                    output["roof_energy_eff_ending"] = "Very Good"
-            else:
-                # Fill missing roof u-values - this fill is not based on recommended upgrades
-                if output["roof_thermal_transmittance_ending"] is None:
-                    raise ValueError("We should not have a None value for the u value")
-
-                if output["roof_insulation_thickness_ending"] is None:
-                    output["roof_insulation_thickness_ending"] = "none"
-
            if recommendation["type"] == "sealing_open_fireplace":
                output["number_open_fireplaces_ending"] = 0

@ -556,13 +535,15 @@ class Property:
            if recommendation["type"] in [
                "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating",
                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
-                "cylinder_thermostat"
+                "cylinder_thermostat", "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
+                "solid_floor_insulation", "suspended_floor_insulation", "mixed_glazing"
            ]:
                # We update the data, as defined in the recommendaton
-                if output["walls_insulation_thickness_ending"] is None:
-                    output["walls_insulation_thickness_ending"] = "none"
+                for prefix in ["walls", "roof", "floor"]:
+                    if output[f"{prefix}_insulation_thickness_ending"] is None:
+                        output[f"{prefix}_insulation_thickness_ending"] = "none"

-                simulation_config = recommendation["simulation_config"]
+                simulation_config = recommendation["simulation_config"].copy()
                # If any entries in simulation_config are None, we will set them to "Unknown" which is the cleaning
                # value
                for key, value in simulation_config.items():
@ -578,9 +559,9 @@ class Property:
                "sealing_open_fireplace", "low_energy_lighting",
                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
                "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
-                "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
+                "solid_floor_insulation", "suspended_floor_insulation",
                "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
-                "heating_control", "secondary_heating", "cylinder_thermostat"
+                "heating_control", "secondary_heating", "cylinder_thermostat", "mixed_glazing"
            ]:
                raise NotImplementedError(
                    "Implement me, given type %s" % recommendation["type"]
@ -590,23 +571,18 @@ class Property:

        return output

-    def get_components(
+    def set_features(
        self,
        cleaned,
-        photo_supply_lookup,
-        floor_area_decile_thresholds,
-        energy_consumption_client
+        kwh_client,
+        kwh_predictions
    ):
        """
        Given the cleaning that has been performed, we'll use this to identify the property
        components, from roof to walls to windows, heating and hot water
        :param cleaned: This is the dictionary of components found in cleaner.cleaned
-        :param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage
-                                    of the roof that is suitable for solar panels
-        :param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
-                                             solar pv roof area
-        :param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current
-                                        energy annual consumption in kWh
+        :param kwh_client: The client that will be used to convert the energy costs to today's costs
+        :param kwh_predictions: Contains the kwh predictions for heating and hot water
        :return:
        """

@ -669,21 +645,22 @@ class Property:
        self.set_floor_type()
        self.set_floor_level()
        self.set_windows_count()
-        self.set_solar_panel_area(
-            photo_supply_lookup=photo_supply_lookup,
-            floor_area_decile_thresholds=floor_area_decile_thresholds,
-        )
        self.set_energy_source()
        self.find_energy_sources()
-        self.set_current_energy_bill(energy_consumption_client)
+        self.set_current_energy_bill(kwh_client, kwh_predictions)

-    def set_solar_panel_configuration(self, solar_panel_configuration):
+    def set_solar_panel_configuration(
+        self, solar_panel_configuration, roof_area
+    ):
        """
        This funtion inserts the solar panel configuration into the property object
        """
        self.solar_panel_configuration = solar_panel_configuration

-    def set_current_energy_bill(self, energy_consumption_client):
+        # We also set the roof area
+        self.roof_area = roof_area
+
+    def set_current_energy_bill(self, kwh_client, kwh_predictions):
        """
        Given what we know about the property now, estimates the current energy consumption using the UCL paper
        https://www.sciencedirect.com/science/article/pii/S0378778823002542
@ -695,103 +672,58 @@ class Property:
        # 2) Predicted KwH

        # Today's costs
-        todays_heating_cost = energy_consumption_client.convert_cost_to_today(
-            original_cost=float(self.data["heating-cost-current"]),
-            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
-        )
-        todays_hot_water_cost = energy_consumption_client.convert_cost_to_today(
-            original_cost=float(self.data["hot-water-cost-current"]),
-            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
-        )
-        todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
+        todays_lighting_cost = kwh_client.convert_cost_to_today(
            original_cost=float(self.data["lighting-cost-current"]),
-            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
        )

-        scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
-        # Change columns from underscores to hyphens
-        scoring_df.columns = [
-            x.lower().replace("_", "-") for x in scoring_df.columns
-        ]
-        for col in ["heating_kwh", "hot_water_kwh"]:
-            scoring_df[col] = None
+        # If we have the kwh figures, we don't need to predict them
+        condition_data = self.energy_assessment_condition_data.copy()

-        energy_consumption_client.data = None
-        heating_prediction = energy_consumption_client.score_new_data(
-            new_data=scoring_df, target="heating_kwh"
-        )[0]
+        heating_kwh_predictions = kwh_predictions["heating_kwh_predictions"]
+        hotwater_kwh_predictions = kwh_predictions["hotwater_kwh_predictions"]

-        hot_water_prediction = energy_consumption_client.score_new_data(
-            new_data=scoring_df, target="hot_water_kwh"
-        )[0]
+        heating_prediction = (
+            condition_data.get("space_heating_kwh") if condition_data.get("space_heating_kwh") is not None else
+            heating_kwh_predictions[
+                heating_kwh_predictions["id"].astype(int) == self.uprn
+                ]["predictions"].values[0]
+        )
+
+        hot_water_prediction = (
+            condition_data.get("water_heating_kwh") if condition_data.get("water_heating_kwh") is not None else
+            hotwater_kwh_predictions[
+                hotwater_kwh_predictions["id"].astype(int) == self.uprn
+                ]["predictions"].values[0]
+        )

        # We convert the lighting cost into kwh, just using the price cap
-        lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        lighting_kwh = todays_lighting_cost / AnnualBillSavings.ELECTRICITY_PRICE_CAP

        appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)

-        adjusted_heating_kwh = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=heating_prediction,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
+        unadjusted_kwh_estimates = {
+            "heating": float(heating_prediction),
+            "hot_water": float(hot_water_prediction),
+            "lighting": float(lighting_kwh),
+            "appliances": float(appliances_kwh)
+        }

-        adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=hot_water_prediction,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=lighting_kwh,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=appliances_kwh,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        # Adjust today's cost figures with the UCL model
-        adjusted_heating_cost = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=todays_heating_cost,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=todays_hot_water_cost,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_lighting_cost = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=todays_lighting_cost,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_appliances_cost = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
+        unadjusted_heating_costs = {
+            "heating": None,
+            "hot_water": None,
+            "lighting": float(todays_lighting_cost),
+            "appliances": float(appliances_kwh) * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        }

        # Sum up the adjusted kwh figures
-        self.current_adjusted_energy = (
-            adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
-        )
-        self.current_energy_bill = (
-            adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
+        self.current_energy_consumption = sum(list(unadjusted_kwh_estimates.values()))
+        self.current_energy_consumption_heating_hotwater = (
+            unadjusted_kwh_estimates["heating"] + unadjusted_kwh_estimates["hot_water"]
        )

        self.energy_cost_estimates = {
-            "adjusted": {
-                "heating": adjusted_heating_cost,
-                "hot_water": adjusted_hot_water_cost,
-                "lighting": adjusted_lighting_cost,
-                "appliances": adjusted_appliances_cost
-            },
-            "unadjusted": {
-                "heating": todays_heating_cost,
-                "hot_water": todays_hot_water_cost,
-                "lighting": todays_lighting_cost,
-                "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-            },
+            "unadjusted": unadjusted_heating_costs,
            "epc": {
                "heating": float(self.data["heating-cost-current"]),
                "hot_water": float(self.data["hot-water-cost-current"]),
@ -800,18 +732,7 @@ class Property:
        }

        self.energy_consumption_estimates = {
-            "adjusted": {
-                "heating": adjusted_heating_kwh,
-                "hot_water": adjusted_hot_water_kwh,
-                "lighting": adjusted_lighting_kwh,
-                "appliances": adjusted_applicances_kwh
-            },
-            "unadjusted": {
-                "heating": heating_prediction,
-                "hot_water": hot_water_prediction,
-                "lighting": lighting_kwh,
-                "appliances": appliances_kwh
-            }
+            "unadjusted": unadjusted_kwh_estimates
        }

    def set_spatial(self, spatial: pd.DataFrame):
@ -861,7 +782,10 @@ class Property:
        property_data = {
            "creation_status": "READY",
            "uprn": int(self.data["uprn"]),
-            "building_reference_number": int(self.data["building-reference-number"]),
+            "building_reference_number": (
+                int(self.data["building-reference-number"]) if
+                self.data["building-reference-number"] is not None else None
+            ),
            "has_pre_condition_report": True,
            "has_recommendations": True,
            "property_type": self.data["property-type"],
@ -947,7 +871,8 @@ class Property:
            "energy_tariff": self.data["energy-tariff"],
            "primary_energy_consumption": self.energy["primary_energy_consumption"],
            "co2_emissions": self.energy["co2_emissions"],
-            "adjusted_energy_consumption": self.current_adjusted_energy,
+            "current_energy_demand": self.current_energy_consumption,
+            "current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater,
            "estimated": self.data.get("estimated", False),
        }

@ -1030,27 +955,40 @@ class Property:
        medians across the EPC data
        :return:
        """
+        # Many of these pieces of information are now contained in the condition data
+        condition_data = self.energy_assessment_condition_data.copy()

-        # TODO: These functions should work on an EPCRecord object, so that the format is more standardised.
-        #       They could also be added as attributes to the EPC Record
+        # We can update the number of floors if we have this information in the condition data
+        self.number_of_floors = int(self.energy_assessment_condition_data["number_of_floors"]) \
+            if (condition_data.get("number_of_floors") is not None) and (self.number_of_floors is not None) \
+            else self.number_of_floors

-        self.perimeter = estimate_perimeter(
-            self.floor_area / self.number_of_floors,
-            self.number_of_rooms / self.number_of_floors,
-        )
+        # If we already have this, we re-engineer the perimeter
+        if self.insulation_floor_area is not None:
+            self.perimeter = np.sqrt(self.insulation_floor_area) * 4
+        else:
+            self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \
+                if condition_data.get("perimeter") is not None \
+                else estimate_perimeter(
+                floor_area=self.floor_area / self.number_of_floors,
+                num_rooms=self.number_of_rooms / self.number_of_floors
+            )

-        self.insulation_wall_area = estimate_external_wall_area(
+        self.insulation_wall_area = float(self.energy_assessment_condition_data["insulation_wall_area"]) \
+            if (condition_data.get("insulation_wall_area") is not None) and (self.insulation_wall_area is not None) \
+            else estimate_external_wall_area(
            num_floors=self.number_of_floors,
            floor_height=self.floor_height,
            perimeter=self.perimeter,
            built_form=self.data["built-form"],
        )

-        self.insulation_floor_area = self.floor_area / self.number_of_floors
-
-        self.pitched_roof_area = esimtate_pitched_roof_area(
-            floor_area=self.insulation_floor_area, floor_height=self.floor_height
-        )
+        if self.insulation_floor_area is None:
+            self.insulation_floor_area = float(
+                self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]
+            ) if (condition_data.get("main_dwelling_ground_floor_area") is not None) else (
+                self.floor_area / self.number_of_floors
+            )

    def set_floor_level(self):
        self.floor_level = (
@ -1132,23 +1070,17 @@ class Property:

        return component_data

-    def set_adjusted_energy(
-        self, expected_adjusted_energy, expected_energy_bill
-    ):
-        """
-        Stores these values for usage later
-        """
-
-        self.expected_adjusted_energy = expected_adjusted_energy
-        self.expected_energy_bill = expected_energy_bill
-
    def set_windows_count(self):
        """
        Using the estimate_windows function, this method will set the number of windows in the property
        :return:
        """

-        self.number_of_windows = estimate_windows(
+        condition_data = self.energy_assessment_condition_data.copy()
+
+        self.number_of_windows = int(condition_data["number_of_windows"]) \
+            if condition_data.get("number_of_windows") is not None \
+            else estimate_windows(
            property_type=self.data["property-type"],
            built_form=self.data["built-form"],
            construction_age_band=self.construction_age_band,
@ -1156,47 +1088,9 @@ class Property:
            number_habitable_rooms=self.number_of_rooms,
        )

-    def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
-        """
-        Sets the approximate area of the solar panels
-        :return:
-        """
-
-        if (self.insulation_floor_area is None) and (self.pitched_roof_area is None):
-            raise ValueError(
-                "Need to set insulation floor area and pitched roof area before setting solar pv roof area"
-            )
-
-        photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup(
-            photo_supply_lookup=photo_supply_lookup,
-            floor_area_decile_thresholds=floor_area_decile_thresholds,
-            tenure=self.data["tenure"],
-            built_form=self.data["built-form"],
-            property_type=self.data["property-type"],
-            construction_age_band=self.construction_age_band,
-            is_flat=self.roof["is_flat"],
-            is_pitched=self.roof["is_pitched"],
-            is_roof_room=self.roof["is_roof_room"],
-            floor_area=self.floor_area,
-        )
-
-        percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
-        percentage_of_roof = percentage_of_roof / 100
-
-        self.solar_pv_percentage = percentage_of_roof
-
-    def get_solar_pv_roof_area(self, percentage_of_roof):
-        """
-        Given a percentage of the roof, this method will return the estimated area of the solar panels
-        :param percentage_of_roof:
-        :return:
-        """
-
-        return (
-            self.insulation_floor_area * percentage_of_roof
-            if self.roof["is_flat"]
-            else self.pitched_roof_area * percentage_of_roof
-        )
+        self.windows_area = float(condition_data["windows_area"]) \
+            if condition_data.get("windows_area") is not None \
+            else None

    def set_energy_source(self):
        """
@ -1240,7 +1134,9 @@ class Property:
            'has_exhaust_source_heat_pump': 'Electricity',
            'has_community_heat_pump': 'Electricity',
            'has_wood_pellets': 'Wood Pellets',
-            'has_community_scheme': 'Varied (Community Scheme)'
+            'has_community_scheme': 'Varied (Community Scheme)',
+            "has_dual_fuel_mineral_and_wood": 'Wood Logs',
+            "has_electricaire": 'Electricity',
        }

        # Hot water
@ -1266,19 +1162,144 @@ class Property:
            'community scheme': 'Community Scheme'
        }

-        self.heating_energy_source = [
+        self.heating_energy_source = list({
            fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
-        ]
+        })
+
+        if set(self.heating_energy_source) == {'Electricity', 'Natural Gas'}:
+            # It means they have mixed heating so we take the primary one, based on main fuel
+            # This will probably happen in the case of an extension
+            if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
+                self.heating_energy_source = ['Natural Gas']
+            else:
+                self.heating_energy_source = ['Electricity']
+
+        if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}:
+            # It means they have mixed heating so we take the primary one, based on main fuel
+            # This will probably happen in the case of an extension
+            if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
+                self.heating_energy_source = ['Natural Gas']
+            else:
+                self.heating_energy_source = ['Wood Logs']
+
        if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
-            raise Exception("Investigate em")
+            raise Exception("Investigate me")

        self.heating_energy_source = self.heating_energy_source[0]

+        if self.heating_energy_source == "Varied (Community Scheme)":
+            if self.main_fuel["fuel_type"] == "mains gas":
+                self.heating_energy_source = "Natural Gas (Community Scheme)"
+            else:
+                raise Exception("Implement me")
+
        if self.hotwater["heater_type"] is not None:
            self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
+
+            if self.hotwater["extra_features"] == "plus solar":
+                self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
+                return
+
        else:
            fuel = system_type_modification[self.hotwater["system_type"]]
-            if fuel == 'Main System':
+
+            if self.hotwater["extra_features"] == "plus solar":
+                self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
+                return
+
+            if fuel in ['Main System', "Community Scheme"]:
                self.hot_water_energy_source = self.heating_energy_source
+            elif fuel in ['Secondary System']:
+                # Check the secondary heating system
+                secondary_heating = self.data["secondheat-description"]
+                self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"]
            else:
                raise Exception("Investiage me")
+
+    def is_ashp_valid(self, measures):
+
+        if "air_source_heat_pump" in self.non_invasive_recommendations:
+            return True
+
+        if "air_source_heat_pump" not in measures:
+            return False
+
+        suitable_property_type = self.data["property-type"] in ["House", "Bungalow"]
+        has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"]
+
+        return suitable_property_type and not has_air_source_heat_pump
+
+    def is_solar_pv_valid(self):
+
+        # If the property is a flat but we are looking at building solar potential, we can include this
+        if (self.building_id is not None) and (self.solar_panel_configuration is not None):
+            return True
+
+        is_valid_property_type = self.data["property-type"] in ["House", "Bungalow", "Maisonette"]
+        is_valid_roof_type = (
+            self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"]
+        )
+        # If there is no existing solar PV, the photo-supply field will be None or a missing value
+        has_no_existing_solar_pv = self.data["photo-supply"] in [
+            None, 0, self.DATA_ANOMALY_MATCHES
+        ]
+
+        return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
+
+    def estimate_electrical_consumption(self, assumed_ashp_efficiency, exclusions):
+        """
+        Given a property, this method estimates the electrical consumption of the property, based on the energy
+        consumption, the assumed efficiency of an ASHP and the exclusions.
+
+        What we're trying to do here is size up the future electricicty demand of the property, assuming that the
+        home is eligible for an ASHP. If the property is not eligible for an ASHP, we don't need to adjust the
+        consumption.
+
+        This figure is used to size up solar panels, so they can cover heat generation, even if the property
+        today doesn't generate its heat from electricity
+
+        :param assumed_ashp_efficiency:
+        :param exclusions:
+        :return:
+        """
+
+        exclusions = [] if exclusions is None else exclusions
+        if "air_source_heat_pump" in exclusions:
+            return self.current_energy_consumption
+
+        # If the property currently has an ASHP, we don't gain from any efficiency improvements
+        if not self.is_ashp_valid(measures=["air_source_heat_pump"]):
+            return self.current_energy_consumption
+
+        # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
+        remap_fuel_sources = [
+            "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
+            "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal"
+        ]
+
+        heating_energy_source = self.heating_energy_source
+        hot_water_energy_source = self.hot_water_energy_source
+        heating_consumption = self.energy_consumption_estimates["unadjusted"]["heating"]
+        hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"]
+
+        if (heating_energy_source not in remap_fuel_sources) or (
+            hot_water_energy_source not in remap_fuel_sources + ["Electricity + Solar Thermal"]
+        ):
+            raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
+
+        if heating_energy_source in remap_fuel_sources:
+            # Adjust the heating consumption to reflect the expected efficiency of an ASHP
+            heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100)
+
+        if hot_water_energy_source in remap_fuel_sources:
+            # Adjust the hot water consumption to reflect the expected efficiency of an ASHP
+            hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100)
+
+        electric_consumption = (
+            heating_consumption +
+            hotwater_consumption +
+            self.energy_consumption_estimates["unadjusted"]["lighting"] +
+            self.energy_consumption_estimates["unadjusted"]["appliances"]
+        )
+
+        return electric_consumption
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@ -292,8 +292,7 @@ class SearchEpc:
                        "error": str(e)
                    }

-    @staticmethod
-    def filter_rows(rows, property_type=None, address=None):
+    def filter_rows(self, rows, property_type=None, address=None):
        """
        This method should not be used when property_type and address are both not None
        :param rows:
@ -321,8 +320,21 @@ class SearchEpc:

        if address is not None:
            # We can do a filter on the property type
-            best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
-            rows_filtered = [r for r in rows if r["address"] == best_match[0]]
+            # We check if the full address contains the postcode and if it does, remove
+            if self.postcode in address:
+                address = address.replace(self.postcode, "").strip().rstrip(",")
+
+            # We check if post town is included in the address
+            if any([r["posttown"].lower() in address.lower() for r in rows]):
+                best_match = process.extractOne(
+                    address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0
+                )
+                # Get all of the scores
+                rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
+            else:
+                best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
+                # Get all of the scores
+                rows_filtered = [r for r in rows if r["address"] == best_match[0]]

            if rows_filtered:
                return rows_filtered
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@ -8,6 +8,8 @@ import time
 from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data
 from utils.logger import setup_logger
 from sklearn.preprocessing import MinMaxScaler
+from recommendations.Costs import Costs
+from math import sin, cos, sqrt, atan2, radians

 logger = setup_logger()

@ -15,10 +17,6 @@ logger = setup_logger()
 class GoogleSolarApi:
    NORTH_FACING_AZIMUTH_RANGE = (-30, 30)

-    # Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
-    # be exported
-    SOLAR_CONSUMPTION_PROPORTION = 0.5
-
    # These are variables, described in the documentation for cost analysis for non-us locations, seen here
    # https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
    # We use the default figures that the API uses for US locations
@ -69,6 +67,9 @@ class GoogleSolarApi:
        # Indicates if we need to store the data to the db
        self.need_to_store = False

+        # Indicates if we think we have both units attached to a semi-detached property
+        self.double_property = False
+
    def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None):
        """
        Make an API request to retrieve building insights based on the given longitude and latitude, with retry
@ -107,15 +108,24 @@ class GoogleSolarApi:

    @lru_cache(maxsize=128)
    def get(
-        self, longitude, latitude, energy_consumption, required_quality="MEDIUM", is_building=False, session=None,
-        uprn=None
+        self,
+        longitude,
+        latitude,
+        energy_consumption,
+        property_instance=None,
+        required_quality="MEDIUM",
+        is_building=False,
+        session=None,
+        uprn=None,
    ):
        """
        Wrapper function that calls get_building_insights and extracts roof segments, with caching.

        :param longitude: The longitude of the location.
        :param latitude: The latitude of the location.
-        :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude.
+        :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude,
+                                    that we wish to size the solar panels up against
+        :param property_instance: The property instance associated to the longitude and latitude.
        :param required_quality: The required quality of the data (default is "MEDIUM").
        :param is_building: Whether the energy consumption is for a building or a unit.
        :param session: The database session to use for the query (default is None).
@ -137,6 +147,14 @@ class GoogleSolarApi:

        # Extract key data from the insights response
        self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
+        # Automatically exclude north-facing segments
+        self.exclude_north_facing_segments(property_instance=property_instance)
+        # If a property is semi-detached, it's possible for us to include segments from an attached unit
+        if (property_instance.data["built-form"] == "Semi-Detached") and (
+            property_instance.data["extension-count"] == 0
+        ):
+            self.exclude_likely_duplicate_surfaces()
+
        self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
        self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
        self.panel_area = (
@ -152,13 +170,17 @@ class GoogleSolarApi:
            # It should be straightforward, but I'd rather see an actual instance of this happening
            raise NotImplementedError("Panel wattage is not 400W - implement me")

-        # Automatically exclude north-facing segments
-        self.exclude_north_facing_segments()
-
        self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments]

        # We now start finding the solar panel configurations
-        self.optimise_solar_configuration(energy_consumption=energy_consumption, is_building=is_building)
+        self.optimise_solar_configuration(
+            energy_consumption=energy_consumption, is_building=is_building, property_instance=property_instance
+        )
+
+        # Finally, if we have a double property, we half the data we stored area
+        if self.double_property:
+            self.roof_area = self.roof_area / 2
+            self.floor_area = self.floor_area / 2

    def save_to_db(self, session, uprns_to_location, scenario_type):
        if self.insights_data is None:
@ -178,7 +200,7 @@ class GoogleSolarApi:
                "yearly_dc_energy",
                "total_cost",
                "panneled_roof_area",
-                "array_warrage",
+                "array_wattage",
                "initial_ac_kwh_per_year",
                "lifetime_ac_kwh",
                "roi",
@ -191,7 +213,7 @@ class GoogleSolarApi:
                "yearly_dc_energy": "yearly_dc_kwh",
                "total_cost": "cost",
                "panneled_roof_area": "panelled_roof_area",
-                "array_warrage": "array_kwhp",
+                "array_wattage": "array_kwhp",
                "initial_ac_kwh_per_year": "yearly_ac_kwh",
            }
        )
@ -226,15 +248,17 @@ class GoogleSolarApi:
                installation_life_span)) /
            (1 - efficiency_depreciation_factor))

-    def optimise_solar_configuration(self, energy_consumption, is_building=False):
+    def optimise_solar_configuration(self, energy_consumption, is_building=False, property_instance=None):
        """
        Optimise the solar panel configuration for the building.
        :return:
        """

+        cost_instance = Costs(property_instance=property_instance) if property_instance is not None else None
+
        # Remove any north facing roof segments
        panel_performance = []
-        for config in self.insights_data["solarPotential"]["solarPanelConfigs"]:
+        for config in self.insights_data["solarPotential"].get("solarPanelConfigs", []):
            roof_segment_summaries = config["roofSegmentSummaries"]
            # Filter on just the segments in self.roof_segment_indexes
            roof_segment_summaries = [
@ -246,7 +270,14 @@ class GoogleSolarApi:
                wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"]
                generated_dc_energy = segment["yearlyEnergyDcKwh"]
                ratio = generated_dc_energy / wattage
-                cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
+
+                if cost_instance is None:
+                    cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
+                else:
+                    cost = cost_instance.solar_pv(
+                        wattage=wattage, has_battery=False
+                    )["total"]
+
                roi_summary.append(
                    {
                        "segmentIndex": segment["segmentIndex"],
@ -260,6 +291,8 @@ class GoogleSolarApi:
                )

            roi_summary = pd.DataFrame(roi_summary)
+            if roi_summary.empty:
+                continue

            weighted_ratio = np.average(
                roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values
@ -274,23 +307,59 @@ class GoogleSolarApi:
                    "total_cost": total_cost,
                    "weighted_ratio": weighted_ratio,
                    "panneled_roof_area": roi_summary["panneled_roof_area"].sum(),
-                    "array_warrage": roi_summary["n_panels"].sum() * self.panel_wattage
+                    "array_wattage": roi_summary["n_panels"].sum() * self.panel_wattage
                }
            )

        panel_performance = pd.DataFrame(panel_performance)
-        # We can have duplicate configurations
+
+        if panel_performance.empty:
+            self.panel_performance = pd.DataFrame(
+                columns=[
+                    "n_panels",
+                    "yearly_dc_energy",
+                    "total_cost",
+                    "panneled_roof_area",
+                    "array_wattage",
+                    "initial_ac_kwh_per_year",
+                    "lifetime_ac_kwh",
+                    "roi",
+                    "expected_payback_years",
+                    "lifetime_dc_kwh"
+                ]
+            )
+            return
+
+            # We can have duplicate configurations
+
        panel_performance = panel_performance.drop_duplicates()
        # If we look at the building level, we don't include any projects fewer than 10 panels, otherwise the
        # minimum is 4
        min_panels = 10 if is_building else 4
        panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels]

+        if panel_performance.empty:
+            self.panel_performance = pd.DataFrame(
+                columns=[
+                    "n_panels",
+                    "yearly_dc_energy",
+                    "total_cost",
+                    "panneled_roof_area",
+                    "array_wattage",
+                    "initial_ac_kwh_per_year",
+                    "lifetime_ac_kwh",
+                    "roi",
+                    "expected_payback_years",
+                    "lifetime_dc_kwh"
+                ]
+            )
+            return
+
        panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate

        # Remove anything where the total ac energy is less than half of the array wattage
        panel_performance = panel_performance[
-            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5
+            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_wattage"]) >= 0.5
            ]

        # 2) Calculate the liftime solar energy production
@ -311,12 +380,25 @@ class GoogleSolarApi:
        )

        # Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi
-        lifetime_energy_consumption = energy_consumption * self.installation_life_span
+        # Key things we estimate:
+        # - generation_value: this is the gbp value of the electricity generated
+        # - roi: the return on investment, calcualated as generation_value / total_cost
+        # - surplus: this is the amount of additional energy generated, and therefore how much will be exported
+        # - surplus_value: the value of the surplus energy - this feeds into generation_value, when relevant
+        # - expected_payback_years: the number of years it will take to pay back the initial investment
+
+        # If we have a double property (i.e. the solar api has returned data for two units) we size up the solar panels
+        # for double the consumption, as if for two units.
+        if self.double_property:
+            lifetime_energy_consumption = energy_consumption * 2 * self.installation_life_span
+        else:
+            lifetime_energy_consumption = energy_consumption * self.installation_life_span
        roi_results = []
        for _, panel_config in panel_performance.iterrows():
            lifetime_ac_kwh = panel_config["lifetime_ac_kwh"]

            surplus = 0
+            generation_deficit = 0
            if lifetime_ac_kwh < lifetime_energy_consumption:
                # We estimate the amount of electricity generated, based on the price cap
                generation_value = lifetime_ac_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
@ -329,7 +411,6 @@ class GoogleSolarApi:
                surplus_value = surplus * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
                generation_value = lifetime_energy_consumption * AnnualBillSavings.ELECTRICITY_PRICE_CAP
                roi = (generation_value + surplus_value) / panel_config["total_cost"]
-                generation_deficit = surplus_value

            # Calculate expected payback years
            if generation_value > 0:
@ -381,9 +462,34 @@ class GoogleSolarApi:

        panel_performance["expected_payback_years"] = np.ceil(panel_performance["expected_payback_years"]).astype(int)

+        if self.double_property:
+            # Now that we've optimise to an energy consumption that is double the original, we need to half the
+            # results
+            panel_performance["n_panels_halved"] = panel_performance["n_panels"] / 2
+            n_panels_required = {int(x) for x in np.floor(panel_performance["n_panels"] / 2)}
+            # We filter the data on this number of panels
+            panel_performance = panel_performance[panel_performance["n_panels_halved"].isin(n_panels_required)]
+            # We half the generation values
+            for col in [
+                "yearly_dc_energy",
+                "total_cost",
+                "panneled_roof_area",
+                "array_wattage",
+                "initial_ac_kwh_per_year",
+                "lifetime_ac_kwh",
+                "lifetime_dc_kwh",
+                "generation_value",
+                "generation_deficit",
+                "surplus"
+            ]:
+                panel_performance[col] = panel_performance[col] / 2
+
+            panel_performance["n_panels"] = panel_performance["n_panels_halved"]
+            panel_performance = panel_performance.drop(columns=["n_panels_halved"])
+
        self.panel_performance = panel_performance

-    def exclude_north_facing_segments(self):
+    def exclude_north_facing_segments(self, property_instance):
        """
        Filter out any north-facing roof segments from the roof_segments attribute.

@ -394,9 +500,86 @@ class GoogleSolarApi:
        for segment_index, segment in enumerate(self.roof_segments):
            segment["segmentIndex"] = segment_index
            # Check if the segment is north-facing
-            if self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]:
+            if (
+                self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]
+            ) and not property_instance.roof["is_flat"]:
                continue

            filtered_segments.append(segment)

        self.roof_segments = filtered_segments
+
+    @staticmethod
+    def haversine(lat1, lon1, lat2, lon2):
+        """
+        Calculate the great-circle distance between two points on the Earth
+        given their latitude and longitude in decimal degrees. Using haversine formula.
+        """
+        R = 6373.0  # approximate radius of earth in km
+
+        lat1 = radians(lat1)
+        lon1 = radians(lon1)
+        lat2 = radians(lat2)
+        lon2 = radians(lon2)
+
+        dlon = lon2 - lon1
+        dlat = lat2 - lat1
+
+        a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
+        c = 2 * atan2(sqrt(a), sqrt(1 - a))
+
+        distance = R * c
+        return distance
+
+    def exclude_likely_duplicate_surfaces(self):
+        """
+        By checking the azimuth of the segments, we can exclude any segments that are likely to be duplicates
+        :return:
+        """
+
+        def is_similar(segment1, segment2, azimuth_tol=20):
+            azimuth_diff = abs(segment1['azimuthDegrees'] - segment2['azimuthDegrees'])
+            return azimuth_diff <= azimuth_tol
+
+        property_center = self.insights_data["center"]
+
+        deduped_segments = []
+        dropped_segments = []
+        for segment in self.roof_segments:
+            if not deduped_segments:
+                deduped_segments.append(segment)
+                continue
+
+            similar_segments = [s for s in deduped_segments if is_similar(segment, s)]
+            if not similar_segments:
+                deduped_segments.append(segment)
+            else:
+                # Compare distances to the property center and keep the closer segment
+                for similar_segment in similar_segments:
+                    current_dist = self.haversine(
+                        property_center['latitude'], property_center['longitude'],
+                        segment['center']['latitude'], segment['center']['longitude']
+                    )
+                    similar_dist = self.haversine(
+                        property_center['latitude'], property_center['longitude'],
+                        similar_segment['center']['latitude'], similar_segment['center']['longitude']
+                    )
+
+                    if current_dist < similar_dist:
+                        deduped_segments.remove(similar_segment)
+                        deduped_segments.append(segment)
+                        dropped_segments.append(similar_segment)
+                    else:
+                        dropped_segments.append(segment)
+
+        # If we have a semi-detached property that has duplicated segments, we should expect to half the number of
+        # segments
+        if len(deduped_segments) < len(self.roof_segments):
+            if len(deduped_segments) != len(self.roof_segments) / 2:
+                # We don't perform any dropping in this case
+                return
+
+            # Because the segments are duplicated, but the sizes aren't necessarily split perfectly in half, what
+            # we need to do is perform the solar analysis and then half the results. We set an indicator which
+            # implies we should do this
+            self.double_property = True
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@ -0,0 +1,44 @@
+# Assumes that the average efficiency of an air source heat pump is 250%, taking the median of the 200-400% range,
+# which is often quoted as a sensible efficiency range for air source heat pumps.
+PESSIMISTIC_ASHP_EFFICIENCY = 200
+AVERAGE_ASHP_EFFICIENCY = 300
+
+# Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
+# be exported
+SOLAR_CONSUMPTION_PROPORTION = 0.5
+
+DESCRIPTIONS_TO_FUEL_TYPES = {
+    "Air source heat pump, radiators, electric": {
+        "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
+    },
+    "Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    'Electric storage heaters': {"fuel": 'Electricity', "cop": 1},
+    "Electric immersion, off-peak": {"fuel": 'Electricity', "cop": 1},
+    "Electric storage heaters, radiators": {"fuel": 'Electricity', "cop": 1},
+    "Room heaters, electric": {"fuel": 'Electricity', "cop": 1},
+    "Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
+    "Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
+    "Boiler and radiators, LPG": {"fuel": 'LPG', "cop": 0.9},
+    "Room heaters, dual fuel (mineral and wood)": {"fuel": 'Wood Logs', "cop": 1},
+    "Room heaters, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    "Warm air, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    "Boiler, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    "Gas multipoint": {"fuel": "Natural Gas", "cop": 0.9},
+    "Warm air, Electricaire": {"fuel": "Electricity", "cop": 1},
+    "Gas boiler/circulator": {"fuel": "Natural Gas", "cop": 0.9},
+    "Boiler and underfloor heating, mains gas": {"fuel": "Natural Gas", "cop": 0.9},
+    "No system present: electric heaters assumed": {"fuel": "Electricity", "cop": 1},
+    "Electric instantaneous at point of use": {"fuel": "Electricity", "cop": 1},
+    "Boiler and radiators, oil": {"fuel": "Oil", "cop": 0.9},
+    "Electric storage heaters, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
+    "Boiler and radiators, electric": {"fuel": "Electricity", "cop": 0.9},
+    "Gas boiler/circulator, no cylinder thermostat": {"fuel": "Natural Gas", "cop": 0.9},
+    "Boiler and radiators, dual fuel (mineral and wood)": {"fuel": "Wood Logs", "cop": 0.9},
+    "Electric immersion, standard tariff, plus solar": {"fuel": "Electricity + Solar Thermal", "cop": 1},
+    "From main system, flue gas heat recovery": {"fuel": "Natural Gas", "cop": 0.9},
+    "Electric underfloor heating": {"fuel": "Electricity", "cop": 1},
+    "No system present: electric immersion assumed": {"fuel": "Electricity", "cop": 1},
+    "Air source heat pump, underfloor, electric": {
+        "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
+    },
+}
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -30,6 +30,11 @@ class Settings(BaseSettings):
    LIGHTING_COST_PREDICTIONS_BUCKET: str
    HEATING_COST_PREDICTIONS_BUCKET: str
    HOT_WATER_COST_PREDICTIONS_BUCKET: str
+    HEATING_KWH_PREDICTIONS_BUCKET: str
+    HOTWATER_KWH_PREDICTIONS_BUCKET: str
+
+    # Other S3 buckts
+    ENERGY_ASSESSMENTS_BUCKET: str

    class Config:
        env_file = "backend/.env"
@ -48,5 +53,7 @@ def get_prediction_buckets():
        "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
        "lighting_cost_predictions": get_settings().LIGHTING_COST_PREDICTIONS_BUCKET,
        "heating_cost_predictions": get_settings().HEATING_COST_PREDICTIONS_BUCKET,
-        "hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET
+        "hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET,
+        "heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET,
+        "hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET,
    }
--- a/backend/app/db/functions/energy_assessment_functions.py
+++ b/backend/app/db/functions/energy_assessment_functions.py
@ -0,0 +1,158 @@
+from backend.app.db.models.energy_assessments import (
+    EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments, DocumentTypeEnum
+)
+from sqlalchemy.orm import Session
+from sqlalchemy.exc import IntegrityError
+from typing import Optional, List, Dict
+from sqlalchemy import desc
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def bulk_insert_energy_assessments(session: Session, data_list: List[dict]) -> Dict[int, int]:
+    """
+    This function inserts or updates multiple energy assessment records into the database and returns a mapping of
+    uprn to energy_assessment_id.
+
+    :param session: The SQLAlchemy session.
+    :param data_list: A list of dictionaries containing energy assessment data.
+    :return: A dictionary mapping each uprn to its corresponding energy_assessment_id.
+    """
+    uprn_to_assessment_id = {}
+
+    try:
+        for data in data_list:
+            uprn = data.get('uprn')
+            inspection_date = data.get('inspection_date')
+
+            # Check if a record with the same uprn and inspection_date exists
+            existing_record = session.query(EnergyAssessment).filter_by(
+                uprn=uprn,
+                inspection_date=inspection_date
+            ).first()
+
+            if existing_record:
+                # Update the existing record with new data
+                for key, value in data.items():
+                    setattr(existing_record, key, value)
+                session.add(existing_record)
+
+                # Map the uprn to the existing record's ID
+                uprn_to_assessment_id[uprn] = existing_record.id
+            else:
+                # Insert a new record
+                new_assessment = EnergyAssessment(**data)
+                session.add(new_assessment)
+
+                # Flush the session to get the newly created ID before commit
+                session.flush()
+
+                # Map the uprn to the new record's ID
+                uprn_to_assessment_id[uprn] = new_assessment.id
+
+        # Commit the transaction
+        session.commit()
+        logger.info("All records inserted or updated successfully.")
+
+    except IntegrityError as e:
+        # Rollback the session in case of error
+        session.rollback()
+        logger.info(f"Error occurred: {e}")
+
+    return uprn_to_assessment_id
+
+
+def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]:
+    """
+    Retrieve the latest energy assessment for a given UPRN based on the inspection date.
+
+    :param session: The database session
+    :param uprn: The unique property reference number
+    :return: The latest EnergyAssessment object or None if not found
+    """
+    try:
+        # Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order
+        latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by(
+            desc(EnergyAssessment.inspection_date)).first()
+
+        return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response()
+    except Exception as e:
+        logger.info(f"An error occurred: {e}")
+        return None
+
+
+def create_scenarios_for_documents(session: Session, document_list: List[dict], uprn_to_assessment_id: dict):
+    """
+    Creates scenarios for documents by UPRN and links them to the energy assessments.
+
+    :param session: The SQLAlchemy session.
+    :param document_list: A list of dictionaries containing document data.
+    :param uprn_to_assessment_id: A dictionary mapping UPRN to energy_assessment_id.
+    """
+    try:
+        for document in document_list:
+            uprn = document.get('uprn')
+            scenario_name = document.get('scenario_id')
+
+            if scenario_name:
+                # Get the associated energy_assessment_id for the UPRN
+                energy_assessment_id = uprn_to_assessment_id.get(uprn)
+
+                # Check if the scenario already exists
+                existing_scenario = session.query(EnergyAssessmentScenarios).filter_by(
+                    scenario_name=scenario_name,
+                    energy_assessment_id=energy_assessment_id
+                ).first()
+
+                if not existing_scenario:
+                    # Create the scenario
+                    new_scenario = EnergyAssessmentScenarios(
+                        scenario_name=scenario_name,
+                        energy_assessment_id=energy_assessment_id
+                    )
+                    session.add(new_scenario)
+                    session.flush()  # Get the new scenario ID
+
+                    # Update document with new scenario ID
+                    document['scenario_id'] = new_scenario.id
+                else:
+                    # If the scenario already exists, just use its ID
+                    document['scenario_id'] = existing_scenario.id
+
+        # Commit the scenarios
+        session.commit()
+        logger.info("Scenarios created successfully.")
+
+    except IntegrityError as e:
+        session.rollback()
+        logger.info(f"Error occurred: {e}")
+
+
+def create_documents(session: Session, document_list: List[dict]):
+    """
+    Inserts documents into the energy_assessment_documents table, linking them to scenarios and assessments.
+
+    :param session: The SQLAlchemy session.
+    :param document_list: A list of dictionaries containing document data.
+    """
+    try:
+        for document in document_list:
+            # Ensure the document_type is cast to Enum
+            new_document = EnergyAssessmentDocuments(
+                uprn=document['uprn'],
+                document_type=DocumentTypeEnum(document['document_type']).value,
+                document_location=document['document_location'],
+                energy_assessment_id=document['energy_assessment_id'],
+                scenario_id=document.get('scenario_id')  # Might be None if no scenario
+            )
+
+            session.add(new_document)
+
+        # Commit all document insertions
+        session.commit()
+        logger.info("Documents created successfully.")
+
+    except IntegrityError as e:
+        session.rollback()
+        logger.info(f"Error occurred: {e}")
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@ -1,10 +1,14 @@
 from sqlalchemy import func
-from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation
-from backend.app.db.models.portfolio import Portfolio
+from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario


 def aggregate_portfolio_recommendations(
-    session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict
+    session,
+    portfolio_id: int,
+    scenario_id: int,
+    total_valuation_increase: float,
+    labour_days: float,
+    aggregated_data: dict
 ):
    # Aggregate multiple fields
    aggregates = (
@ -17,7 +21,11 @@ def aggregate_portfolio_recommendations(
        )
        .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
        .join(Plan, Plan.id == PlanRecommendations.plan_id)
-        .filter(Plan.portfolio_id == portfolio_id, Plan.is_default == True, Recommendation.default == True)
+        .filter(
+            Plan.portfolio_id == portfolio_id,
+            Plan.scenario_id == scenario_id,
+            Recommendation.default == True
+        )
        .one()
    )

@ -30,16 +38,17 @@ def aggregate_portfolio_recommendations(
        **aggregated_data
    }

-    # Get the portfolio and update the fields
-    portfolio = session.query(Portfolio).filter_by(id=portfolio_id).one()
+    # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
+    portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
+
    # Update the data
    for key, value in aggregates_dict.items():
-        setattr(portfolio, key, value)
+        setattr(portfolio_scenario, key, value)

    # Insert total valuation increase and labour days
-    portfolio.property_valuation_increase = total_valuation_increase
-    portfolio.labour_days = labour_days
+    portfolio_scenario.property_valuation_increase = total_valuation_increase
+    portfolio_scenario.labour_days = labour_days

-    # Merge the updated portfolio back into the session
-    session.merge(portfolio)
+    # Merge the updated portfolio plan back into the session
+    session.merge(portfolio_scenario)
    session.flush()
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@ -1,8 +1,12 @@
 from sqlalchemy import insert, delete
 from sqlalchemy.orm import Session
-from backend.app.db.models.recommendations import Plan, Recommendation, RecommendationMaterials, PlanRecommendations
-from backend.app.db.models.portfolio import PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, \
-    PropertyDetailsEpcModel
+from sqlalchemy.exc import SQLAlchemyError
+from backend.app.db.models.recommendations import (
+    Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
+)
+from backend.app.db.models.portfolio import (
+    PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel
+)


 def create_plan(session: Session, plan):
@ -11,12 +15,38 @@ def create_plan(session: Session, plan):
    :param session: The database session
    :param plan: dictionary of data representing a plan to be created
    """
+    try:
+        new_plan = Plan(**plan)
+        session.add(new_plan)
+        session.flush()
+        session.commit()
+        return new_plan.id
+    except SQLAlchemyError as e:
+        session.rollback()
+        raise e

-    new_plan = Plan(**plan)
-    session.add(new_plan)
-    session.flush()

-    return new_plan.id
+def create_scenario(session: Session, scenario):
+    """
+    This function will create a record for the scenario in the database if it does not exist.
+    :param session: The database session
+    :param scenario: dictionary of data representing a scenario to be created
+    """
+    try:
+
+        # Before creating a new scenario, we check if there is a scenario for this portfolio id already
+        # If there is, it means that any new scnario created will NOT be the default scenario
+        existing_scenario = session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first()
+        scenario["is_default"] = True if not existing_scenario else False
+
+        new_scenario = Scenario(**scenario)
+        session.add(new_scenario)
+        session.flush()
+        session.commit()
+        return new_scenario
+    except SQLAlchemyError as e:
+        session.rollback()
+        raise e


 def create_recommendation(session: Session, recommendation):
@ -25,12 +55,15 @@ def create_recommendation(session: Session, recommendation):
    :param session: The database session
    :param recommendation: dictionary of data representing a recommendation to be created
    """
-
-    new_recommendation = Recommendation(**recommendation)
-    session.add(new_recommendation)
-    session.flush()
-
-    return new_recommendation.id
+    try:
+        new_recommendation = Recommendation(**recommendation)
+        session.add(new_recommendation)
+        session.flush()
+        session.commit()
+        return new_recommendation.id
+    except SQLAlchemyError as e:
+        session.rollback()
+        raise e


 def create_recommendation_material(session: Session, recommendation_id, material_id, depth):
@ -68,62 +101,68 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids):
    session.execute(insert(PlanRecommendations).values(data))


-def upload_recommendations(session: Session, recommendations_to_upload, property_id):
-    # Prepare data for bulk insert for Recommendation
-    recommendations_data = [
-        {
-            "property_id": property_id,
-            "type": rec["type"],
-            "description": rec["description"],
-            "estimated_cost": rec["total"],
-            "default": rec["default"],
-            "starting_u_value": rec.get("starting_u_value"),
-            "new_u_value": rec.get("new_u_value"),
-            "sap_points": rec["sap_points"],
-            "energy_savings": rec["heat_demand"],
-            "kwh_savings": rec["kwh_savings"],
-            "co2_equivalent_savings": rec["co2_equivalent_savings"],
-            "total_work_hours": rec["labour_hours"],
-            "energy_cost_savings": rec["energy_cost_savings"],
-            "labour_days": rec["labour_days"],
-            "already_installed": rec["already_installed"],
-        }
-        for rec in recommendations_to_upload
-    ]
+def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id):
+    try:
+        # Prepare data for bulk insert for Recommendation
+        recommendations_data = [
+            {
+                "property_id": property_id,
+                "type": rec["type"],
+                "description": rec["description"],
+                "estimated_cost": rec["total"],
+                "default": rec["default"],
+                "starting_u_value": rec.get("starting_u_value"),
+                "new_u_value": rec.get("new_u_value"),
+                "sap_points": rec["sap_points"],
+                "energy_savings": rec["heat_demand"],
+                "kwh_savings": rec["kwh_savings"],
+                "co2_equivalent_savings": rec["co2_equivalent_savings"],
+                "total_work_hours": rec["labour_hours"],
+                "energy_cost_savings": rec["energy_cost_savings"],
+                "labour_days": rec["labour_days"],
+                "already_installed": rec["already_installed"],
+            }
+            for rec in recommendations_to_upload
+        ]

-    session.bulk_insert_mappings(Recommendation, recommendations_data)
+        # Insert the recommendations, get back the IDs
+        stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data)
+        result = session.execute(stmt)
+        uploaded_recommendation_ids = [row[0] for row in result]

-    # To get the IDs of the newly inserted recommendations, we need to flush the session
-    session.flush()
+        # Prepare data for bulk insert for RecommendationMaterials
+        recommendation_materials_data = [
+            {
+                "recommendation_id": recommendation_id,
+                "material_id": part["id"],
+                "depth": int(part["depth"]) if part["depth"] else None,
+                "quantity": part["quantity"],
+                "quantity_unit": part["quantity_unit"],
+                "estimated_cost": part["total"],
+            }
+            for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
+            for part in rec["parts"]
+        ]

-    # Map the uploaded_recommendation_ids with the original data for reference
-    uploaded_recommendation_ids = [rec.id for rec in session.query(Recommendation).filter(
-        Recommendation.property_id == property_id,
-        Recommendation.description.in_([rec["description"] for rec in recommendations_to_upload])
-    )]
+        session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)

-    # Prepare data for bulk insert for RecommendationMaterials
-    # We can have multiple materials per recommendation. The aggregation of the materials will total the
-    # recommendation figures
-    recommendation_materials_data = [
-        {
-            "recommendation_id": recommendation_id,
-            "material_id": part["id"],
-            "depth": int(part["depth"]) if part["depth"] else None,
-            "quantity": part["quantity"],
-            "quantity_unit": part["quantity_unit"],
-            "estimated_cost": part["total"],
-        }
-        for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
-        for part in rec["parts"]
-    ]
+        # flush the changes to get the newly created IDs
+        session.flush()

-    session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
+        create_plan_recommendations(
+            session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
+        )

-    # flush the changes to get the newly created IDs
-    session.flush()
+        # Commit the transaction
+        session.commit()

-    return uploaded_recommendation_ids
+        return True
+
+    except SQLAlchemyError as e:
+        # Rollback the transaction in case of an error
+        session.rollback()
+        print(f"An error occurred: {e}")
+        return False


 def clear_portfolio(session: Session, portfolio_id: int):
@ -148,6 +187,9 @@ def clear_portfolio(session: Session, portfolio_id: int):
    # Delete all Plans associated with the portfolio
    session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id))

+    # Delete all Scenarios associated with the portfolio
+    session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id))
+
    # Delete all Recommendations associated with the properties
    session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids)))

--- a/backend/app/db/models/energy_assessments.py
+++ b/backend/app/db/models/energy_assessments.py
@ -0,0 +1,207 @@
+from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.dialects.postgresql import ENUM as PgEnum
+import enum
+from datetime import datetime
+
+Base = declarative_base()
+
+
+class EnergyAssessment(Base):
+    __tablename__ = 'energy_assessments'
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    uprn = Column(BigInteger, nullable=False)
+    uprn_source = Column(Text, nullable=False)
+    property_type = Column(Text, nullable=False)
+    building_reference_number = Column(Text)
+    current_energy_efficiency = Column(Text, nullable=False)
+    current_energy_rating = Column(Text, nullable=False)
+    address1 = Column(Text, nullable=False)
+    address2 = Column(Text, nullable=False)
+    address3 = Column(Text)
+    posttown = Column(Text, nullable=False)
+    postcode = Column(Text, nullable=False)
+    address = Column(Text, nullable=False)
+    county = Column(Text)
+    constituency = Column(Text)
+    constituency_label = Column(Text)
+    low_energy_fixed_light_count = Column(Text, nullable=False)
+    construction_age_band = Column(Text, nullable=False)
+    mainheat_energy_eff = Column(Text, nullable=False)
+    windows_env_eff = Column(Text, nullable=False)
+    lighting_energy_eff = Column(Text, nullable=False)
+    environment_impact_potential = Column(Text, nullable=False)
+    mainheatcont_description = Column(Text, nullable=False)
+    sheating_energy_eff = Column(Text, nullable=False)
+    local_authority = Column(Text, nullable=False)
+    local_authority_label = Column(Text, nullable=False)
+    fixed_lighting_outlets_count = Column(Text, nullable=False)
+    energy_tariff = Column(Text, nullable=False)
+    mechanical_ventilation = Column(Text, nullable=False)
+    solar_water_heating_flag = Column(Text, nullable=False)
+    co2_emissions_potential = Column(Text, nullable=False)
+    number_heated_rooms = Column(Text, nullable=False)
+    floor_description = Column(Text, nullable=False)
+    energy_consumption_potential = Column(Text, nullable=False)
+    built_form = Column(Text, nullable=False)
+    number_open_fireplaces = Column(Text, nullable=False)
+    windows_description = Column(Text, nullable=False)
+    glazed_area = Column(Text, nullable=False)
+    inspection_date = Column(DateTime(timezone=True), nullable=False)
+    mains_gas_flag = Column(Text, nullable=False)
+    co2_emiss_curr_per_floor_area = Column(Text, nullable=False)
+    heat_loss_corridor = Column(Text, nullable=False)
+    unheated_corridor_length = Column(Text)
+    flat_storey_count = Column(Text)
+    roof_energy_eff = Column(Text, nullable=False)
+    total_floor_area = Column(Text, nullable=False)
+    environment_impact_current = Column(Text, nullable=False)
+    roof_description = Column(Text, nullable=False)
+    floor_energy_eff = Column(Text, nullable=False)
+    number_habitable_rooms = Column(Text, nullable=False)
+    hot_water_env_eff = Column(Text, nullable=False)
+    mainheatc_energy_eff = Column(Text, nullable=False)
+    main_fuel = Column(Text, nullable=False)
+    lighting_env_eff = Column(Text, nullable=False)
+    windows_energy_eff = Column(Text, nullable=False)
+    floor_env_eff = Column(Text, nullable=False)
+    sheating_env_eff = Column(Text, nullable=False)
+    lighting_description = Column(Text, nullable=False)
+    roof_env_eff = Column(Text, nullable=False)
+    walls_energy_eff = Column(Text, nullable=False)
+    photo_supply = Column(Text, nullable=False)
+    lighting_cost_potential = Column(Text, nullable=False)
+    mainheat_env_eff = Column(Text, nullable=False)
+    multi_glaze_proportion = Column(Text, nullable=False)
+    main_heating_controls = Column(Text, nullable=False)
+    flat_top_storey = Column(Text)
+    secondheat_description = Column(Text, nullable=False)
+    walls_env_eff = Column(Text, nullable=False)
+    transaction_type = Column(Text, nullable=False)
+    extension_count = Column(Text, nullable=False)
+    mainheatc_env_eff = Column(Text, nullable=False)
+    lmk_key = Column(Text)
+    wind_turbine_count = Column(Text, nullable=False)
+    tenure = Column(Text, nullable=False)
+    floor_level = Column(Text, nullable=False)
+    potential_energy_efficiency = Column(Text, nullable=False)
+    potential_energy_rating = Column(Text, nullable=False)
+    hot_water_energy_eff = Column(Text, nullable=False)
+    low_energy_lighting = Column(Text, nullable=False)
+    walls_description = Column(Text, nullable=False)
+    hotwater_description = Column(Text, nullable=False)
+    co2_emissions_current = Column(Text, nullable=False)
+    heating_cost_current = Column(Text, nullable=False)
+    heating_cost_potential = Column(Text, nullable=False)
+    hot_water_cost_current = Column(Text, nullable=False)
+    hot_water_cost_potential = Column(Text, nullable=False)
+    lighting_cost_current = Column(Text, nullable=False)
+    energy_consumption_current = Column(Text, nullable=False)
+    lodgement_date = Column(Date, nullable=False)
+    lodgement_datetime = Column(DateTime(timezone=False), nullable=False)
+    mainheat_description = Column(Text, nullable=False)
+    floor_height = Column(Float, nullable=False)
+    glazed_type = Column(Text, nullable=False)
+    file_location = Column(Text, nullable=False)
+    surveyor_name = Column(Text, nullable=False)
+    surveyor_company = Column(Text, nullable=False)
+    space_heating_kwh = Column(Text, nullable=False)
+    water_heating_kwh = Column(Text, nullable=False)
+    number_of_doors = Column(Integer, nullable=False)
+    number_of_insulated_doors = Column(Integer, nullable=False)
+    number_of_floors = Column(Integer, nullable=False)
+    insulation_wall_area = Column(Float, nullable=False)
+    heat_loss_perimeter = Column(Float, nullable=False)
+    party_wall_length = Column(Float, nullable=False)
+    perimeter = Column(Float, nullable=False)
+    rooms_with_bath_and_or_shower = Column(Integer)
+    rooms_with_mixer_shower_no_bath = Column(Integer)
+    room_with_bath_and_mixer_shower = Column(Integer)
+    percent_draftproofed = Column(Integer)
+    has_hot_water_cylinder = Column(Boolean)
+    cylinder_insulation_type = Column(Text)
+    cylinder_insulation_thickness = Column(Integer)
+    cylinder_thermostat = Column(Boolean)
+    main_dwelling_ground_floor_area = Column(Float)
+    number_of_windows = Column(Integer)
+    windows_area = Column(Float)
+
+    EPC_KEYS = [
+        'low_energy_fixed_light_count', 'address', 'uprn_source', 'floor_height', 'heating_cost_potential',
+        'unheated_corridor_length', 'hot_water_cost_potential', 'construction_age_band', 'potential_energy_rating',
+        'mainheat_energy_eff', 'windows_env_eff', 'lighting_energy_eff', 'environment_impact_potential', 'glazed_type',
+        'heating_cost_current', 'address3', 'mainheatcont_description', 'sheating_energy_eff', 'property_type',
+        'local_authority_label', 'fixed_lighting_outlets_count', 'energy_tariff', 'mechanical_ventilation',
+        'hot_water_cost_current', 'county', 'postcode', 'solar_water_heating_flag', 'constituency',
+        'co2_emissions_potential', 'number_heated_rooms', 'floor_description', 'energy_consumption_potential',
+        'local_authority', 'built_form', 'number_open_fireplaces', 'windows_description', 'glazed_area',
+        'inspection_date', 'mains_gas_flag', 'co2_emiss_curr_per_floor_area', 'address1', 'heat_loss_corridor',
+        'flat_storey_count', 'constituency_label', 'roof_energy_eff', 'total_floor_area', 'building_reference_number',
+        'environment_impact_current', 'co2_emissions_current', 'roof_description', 'floor_energy_eff',
+        'number_habitable_rooms', 'address2', 'hot_water_env_eff', 'posttown', 'mainheatc_energy_eff', 'main_fuel',
+        'lighting_env_eff', 'windows_energy_eff', 'floor_env_eff', 'sheating_env_eff', 'lighting_description',
+        'roof_env_eff', 'walls_energy_eff', 'photo_supply', 'lighting_cost_potential', 'mainheat_env_eff',
+        'multi_glaze_proportion', 'main_heating_controls', 'lodgement_datetime', 'flat_top_storey',
+        'current_energy_rating', 'secondheat_description', 'walls_env_eff', 'transaction_type', 'uprn',
+        'current_energy_efficiency', 'energy_consumption_current', 'mainheat_description', 'lighting_cost_current',
+        'lodgement_date', 'extension_count', 'mainheatc_env_eff', 'lmk_key', 'wind_turbine_count', 'tenure',
+        'floor_level', 'potential_energy_efficiency', 'hot_water_energy_eff', 'low_energy_lighting',
+        'walls_description', 'hotwater_description'
+    ]
+
+    def to_dict(self):
+        """
+        Convert the SQLAlchemy object to a dictionary.
+        """
+
+        epc = {key.replace("_", "-"): getattr(self, key) for key in self.EPC_KEYS}
+        # Get everything else
+        condition = {
+            column.name: getattr(self, column.name)
+            for column in self.__table__.columns if column.name not in self.EPC_KEYS
+        }
+
+        return {"epc": epc, "condition": condition}
+
+    @staticmethod
+    def empty_response():
+        return {"epc": {}, "condition": {}}
+
+
+class EnergyAssessmentScenarios(Base):
+    __tablename__ = 'energy_assessment_scenarios'
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    scenario_name = Column(Text, nullable=False)
+    energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
+
+
+class DocumentTypeEnum(enum.Enum):
+    EPR = "EPR"
+    ConditionReport = "Condition Report"
+    EvidenceReport = "Evidence Report"
+    SummaryInformation = "Summary Information"
+    FloorPlan = "Floor Plan"
+    ScenarioDraftEPC = "Scenario Draft EPC"
+    ScenarioSiteNotes = "Scenario Site Notes"
+
+
+class EnergyAssessmentDocuments(Base):
+    __tablename__ = 'energy_assessment_documents'
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    uprn = Column(BigInteger, nullable=False)
+    energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
+    document_type = Column(PgEnum(DocumentTypeEnum, name="document_type", create_type=False), nullable=False)
+    document_location = Column(Text, nullable=False)
+    uploaded_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow)
+    scenario_id = Column(BigInteger, ForeignKey('energy_assessment_scenarios.id'), nullable=True)
+
+    @staticmethod
+    def empty_response():
+        return {
+            "id": None,
+            "uprn": None,
+            "document_type": None,
+            "document_location": None,
+            "uploaded_at": None,
+            "scenario_id": None
+        }
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@ -3,6 +3,7 @@ import pytz
 import datetime
 from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
 from sqlalchemy.ext.declarative import declarative_base
+from backend.app.db.models.users import UserModel  # noqa

 Base = declarative_base()

@ -168,7 +169,8 @@ class PropertyDetailsEpcModel(Base):
    energy_tariff = Column(Text)
    primary_energy_consumption = Column(Float)
    co2_emissions = Column(Float)
-    adjusted_energy_consumption = Column(Float)
+    current_energy_demand = Column(Float)
+    current_energy_demand_heating_hotwater = Column(Float)
    estimated = Column(Boolean, default=False)


@ -204,3 +206,13 @@ class PropertyTargetsModel(Base):
    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
    epc = Column(Enum(Epc))
    heat_demand = Column(Text)
+
+
+class PortfolioUsers(Base):
+    __tablename__ = "portfolioUsers"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
+    portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
+    role = Column(Text, nullable=False)
+    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@ -50,8 +50,10 @@ class Plan(Base):
    __tablename__ = 'plan'

    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    name = Column(String, nullable=True, default="")
    portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
    property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
+    scenario_id = Column(BigInteger, ForeignKey('scenario.id'))  # Doesn't have to be linked to a scenario
    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
    is_default = Column(Boolean, nullable=False)
    valuation_increase_lower_bound = Column(Float)
@ -65,3 +67,46 @@ class PlanRecommendations(Base):
    id = Column(BigInteger, primary_key=True, autoincrement=True)
    plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
    recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
+
+
+class Scenario(Base):
+    __tablename__ = 'scenario'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    name = Column(String, nullable=False)
+    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
+    budget = Column(Float)
+    portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
+    housing_type = Column(String, nullable=False)
+    goal = Column(String, nullable=False)
+    trigger_file_path = Column(String, nullable=False)
+    already_installed_file_path = Column(String)
+    patches_file_path = Column(String)
+    non_invasive_recommendations_file_path = Column(String)
+    exclusions = Column(String)
+    multi_plan = Column(Boolean, default=False)
+    is_default = Column(Boolean, default=False, nullable=False)
+
+    # Add in the fields we need, which were previously sitting at the portfolio level
+    cost = Column(Float)
+    total_work_hours = Column(Float)
+    energy_savings = Column(Float)
+    co2_equivalent_savings = Column(Float)
+    energy_cost_savings = Column(Float)
+    epc_breakdown_pre_retrofit = Column(String)
+    epc_breakdown_post_retrofit = Column(String)
+    number_of_properties = Column(BigInteger)
+    n_units_to_retrofit = Column(BigInteger)
+    co2_per_unit_pre_retrofit = Column(String)
+    co2_per_unit_post_retrofit = Column(String)
+    energy_bill_per_unit_pre_retrofit = Column(String)
+    energy_bill_per_unit_post_retrofit = Column(String)
+    energy_consumption_per_unit_pre_retrofit = Column(String)
+    energy_consumption_per_unit_post_retrofit = Column(String)
+    valuation_improvement_per_unit = Column(String)
+    cost_per_unit = Column(String)
+    cost_per_co2_saved = Column(String)
+    cost_per_sap_point = Column(String)
+    valuation_return_on_investment = Column(String)
+    property_valuation_increase = Column(Float)
+    labour_days = Column(Float)
--- a/backend/app/energy_assessments/router.py
+++ b/backend/app/energy_assessments/router.py
@ -0,0 +1,273 @@
+import os
+from io import BytesIO
+from typing import List
+
+from fastapi import APIRouter, Depends
+from starlette.responses import Response
+
+from backend.app.config import get_settings
+from backend.app.dependencies import validate_token
+from backend.app.energy_assessments.schemas import EnergyAssessmentUploadPayload
+
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.exc import IntegrityError, OperationalError
+from backend.app.db.connection import db_engine
+from backend.app.db.functions.energy_assessment_functions import (
+    bulk_insert_energy_assessments, create_scenarios_for_documents, create_documents
+)
+
+from etl.xml_survey_extraction.XmlParser import XmlParser
+
+from utils.s3 import (
+    read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3,
+    list_files_in_s3_folder
+)
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def insert_energy_assessment_documents(document_list: List[dict], uprn_to_assessment_id: dict):
+    """
+    Inserts or updates energy assessment documents, assigning the correct energy_assessment_id.
+
+    :param document_list: A list of dictionaries containing document data.
+    :param uprn_to_assessment_id: A dictionary mapping UPRN to energy_assessment_id.
+    """
+    for document in document_list:
+        uprn = document['uprn']
+        # Assign the energy_assessment_id based on uprn
+        energy_assessment_id = uprn_to_assessment_id.get(uprn)
+
+        if not energy_assessment_id:
+            logger.info(f"No energy_assessment_id found for UPRN: {uprn}. Skipping document.")
+            continue
+
+        # Attach energy_assessment_id to each document
+        document['energy_assessment_id'] = energy_assessment_id
+
+    logger.info("Energy Assessment IDs assigned to documents.")
+
+
+router = APIRouter(
+    prefix="/energy-assessments",
+    tags=["energy-assessments"],
+    dependencies=[Depends(validate_token)],
+    responses={404: {"description": "Not found"}}
+)
+
+
+@router.post("/upload")
+async def upload(body: EnergyAssessmentUploadPayload):
+    """
+    Given a location in S3, this service will retrieve the data in s3 and perform the following:
+    1) Extract the data and store it to the data
+    2) Extract the links to other artefacts collected during the energy assessment, such as EPRs, floor plans and
+       condition reports
+
+    This will allow us to do the following:
+    1) Present the findings of the energy assessment to the client
+    2) Allow the end use to download the artefacts collected during the energy assessment
+
+    Eventually, we will this service to collect the key documents from the service where they're uploaded
+    (e.g. Onedrive) and store them to S3, but for the moment, this is sufficient
+
+    """
+
+    logger.info("Connecting to db")
+    session = sessionmaker(bind=db_engine)()
+
+    try:
+        logger.info("Extracting energy assessment data")
+        energy_assessments = list_files_and_subfolders_in_s3_folder(
+            bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+            folder_name=f"{body.surveyor}/{body.project_code}/"
+        )
+
+        logger.info(
+            f"Found {len(energy_assessments)} energy assessments for {body.surveyor} and {body.project_code}"
+        )
+        assessments_map = {}
+        for assessment in energy_assessments:
+            uploaded_xmls = list_xmls_in_s3_folder(
+                bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+                folder_name=os.path.join(assessment, "docs & plans")
+            )
+
+            energy_assessment_files = list_files_in_s3_folder(
+                bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+                folder_name=os.path.join(assessment, "docs & plans")
+            )
+            # Remove xmls from the list of files
+            energy_assessment_files = [file for file in energy_assessment_files if file not in uploaded_xmls]
+            # We now split this into the different types of files
+            # EPR
+            eprs = [
+                file for file in energy_assessment_files if "epr.pdf" in file.split("/")[-1].replace(" ", "").lower()
+            ]
+            # Condition report
+            condition_reports = [
+                file for file in energy_assessment_files if "cr.pdf" in file.split("/")[-1].replace(" ", "").lower()
+            ]
+            # Evidence report
+            evidence_reports = [
+                file for file in energy_assessment_files
+                if "evidence.pdf" in file.split("/")[-1].replace(" ", "").lower()
+            ]
+            # Summary report
+            summary_reports = [
+                file for file in energy_assessment_files
+                if "sn.pdf" in file.split("/")[-1].replace(" ", "").lower()
+            ]
+            # Floor plans - these are just the jpgs
+            floor_plans = [file for file in energy_assessment_files if file.endswith(".jpg")]
+
+            # We now retrieve scenarios
+            scenario_folders = list_files_and_subfolders_in_s3_folder(
+                bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+                folder_name=assessment
+            )
+
+            # filter folders that contain the word scenario
+            scenario_folders = [
+                folder for folder in scenario_folders if "scenario" in folder.rstrip("/").split("/")[-1].lower()
+            ]
+            scenario_documents = []
+            for sf in scenario_folders:
+                scenario_files = list_files_in_s3_folder(
+                    bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+                    folder_name=sf
+                )
+                notes = [
+                    file for file in scenario_files if "sitenotes" in file.split("/")[-1].replace(" ", "").lower()
+                ]
+                # This should be the leftovers
+                draft_epc = [file for file in scenario_files if file not in notes]
+                scenario_documents.append(
+                    {
+                        "identifier": sf.rstrip("/").split("/")[-1],
+                        "Scenario Site Notes": notes,
+                        "Scenario Draft EPC": draft_epc
+                    }
+                )
+
+            uprn = int(assessment.rstrip("/").split("/")[-1])
+            assessments_map[uprn] = {
+                "xmls": uploaded_xmls,
+                "EPR": eprs,
+                "Condition Report": condition_reports,
+                "Evidence Report": evidence_reports,
+                "Summary Information": summary_reports,
+                "Floor Plan": floor_plans,
+                "scenario_documents": scenario_documents
+            }
+
+        logger.info("Extracted energy assessment data and storing file locations to database")
+        xml_data_to_store = []
+        energy_assessment_documents = []
+        for uprn, files in assessments_map.items():
+            # Create the rows of data to insert into the energy assessment documents
+            property_ea_docs = []
+            for doc_type, doc_files in files.items():
+                if doc_type == "xmls":
+                    continue
+
+                if doc_type == "scenario_documents":
+                    for doc in doc_files:
+                        # This scenario id is put in as a placeholder means os associating the scenario documents with
+                        # the correct scenario
+                        scenario_id = doc["identifier"]
+                        for sn in doc["Scenario Site Notes"]:
+                            property_ea_docs.append(
+                                {
+                                    "uprn": uprn,
+                                    "document_type": "Scenario Site Notes",
+                                    "document_location": sn,
+                                    "scenario_id": scenario_id
+                                }
+                            )
+
+                        for d_epc in doc["Scenario Draft EPC"]:
+                            property_ea_docs.append(
+                                {
+                                    "uprn": uprn,
+                                    "document_type": "Scenario Draft EPC",
+                                    "document_location": d_epc,
+                                    "scenario_id": scenario_id
+                                }
+                            )
+
+                    continue
+
+                for doc in doc_files:
+                    property_ea_docs.append(
+                        {
+                            "uprn": uprn,
+                            "document_type": doc_type,
+                            "document_location": doc,
+                            "scenario_id": None
+                        }
+                    )
+            energy_assessment_documents.extend(property_ea_docs)
+
+            xmls = files["xmls"]
+            extracted_data = {}
+            for xml in xmls:
+                xml_data = read_from_s3(bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET, s3_file_name=xml)
+                xml_data_io = BytesIO(xml_data)
+                xml_parser = XmlParser(
+                    file=xml_data_io,
+                    filekey=os.path.join(f"s3://{get_settings().ENERGY_ASSESSMENTS_BUCKET}", xml),
+                    uprn=uprn,
+                    surveyor_company=body.surveyor,
+                )
+                xml_parser.run()
+                if xml_parser.is_lig:
+                    logger.info(f"Extracted data from {xml}")
+                extracted_epc = xml_parser.epc
+                extracted_additional_data = xml_parser.additional_data
+
+                data_to_update = {
+                    **extracted_epc, **extracted_additional_data
+                }
+
+                # We need to update the keys to match the database schema - i.e. we should replace all hyphens with
+                # underscores
+                data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
+
+                extracted_data.update(data_to_update)
+
+            xml_data_to_store.append(extracted_data)
+
+        logger.info("Storing energy assessment xml data to database")
+        uprn_to_assessment_id = bulk_insert_energy_assessments(session, xml_data_to_store)
+
+        # Insert energy assessment id into the documents data
+        insert_energy_assessment_documents(energy_assessment_documents, uprn_to_assessment_id)
+
+        create_scenarios_for_documents(session, energy_assessment_documents, uprn_to_assessment_id)
+
+        create_documents(session, energy_assessment_documents)
+
+        session.close()
+
+    except IntegrityError:
+        logger.error("Database integrity error occurred", exc_info=True)
+        session.rollback()
+        return Response(status_code=500, content="Database integrity error.")
+    except OperationalError:
+        logger.error("Database operational error occurred", exc_info=True)
+        session.rollback()
+        return Response(status_code=500, content="Database operational error.")
+    except ValueError:
+        logger.error("Value error - possibly due to malformed data", exc_info=True)
+        session.rollback()
+        return Response(status_code=400, content="Bad request: malformed data.")
+    except Exception as e:  # General exception handling
+        logger.error(f"An error occurred: {e}")
+        session.rollback()
+        return Response(status_code=500, content="An unexpected error occurred.")
+    finally:
+        session.close()
+
+    return Response(status_code=200)
--- a/backend/app/energy_assessments/schemas.py
+++ b/backend/app/energy_assessments/schemas.py
@ -0,0 +1,10 @@
+from pydantic import BaseModel
+
+
+class EnergyAssessmentUploadPayload(BaseModel):
+    portfolio_id: int
+    # This is the energy assessment company/individual that conducted the energy assessment, where the data is uploaded
+    # against
+    surveyor: str
+    # is a code, like VEC001, which is used to identify the project and also where the data is uploaded against
+    project_code: str
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -10,6 +10,7 @@ from sqlalchemy.exc import IntegrityError, OperationalError
 from sqlalchemy.orm import sessionmaker
 from starlette.responses import Response

+import backend.app.assumptions as assumptions
 from backend.app.config import get_settings, get_prediction_buckets
 from backend.app.db.connection import db_engine
 from backend.app.db.functions.materials_functions import get_materials
@ -19,8 +20,9 @@ from backend.app.db.functions.property_functions import (
    update_or_create_property_spatial_details
 )
 from backend.app.db.functions.recommendations_functions import (
-    create_plan, create_plan_recommendations, upload_recommendations
+    create_plan, upload_recommendations, create_scenario
 )
+from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
 from backend.app.db.models.portfolio import rating_lookup
 from backend.app.dependencies import validate_token
 from backend.app.plan.schemas import PlanTriggerRequest, MdsRequest
@ -28,9 +30,9 @@ from backend.app.plan.utils import get_cleaned
 from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc

 from backend.ml_models.api import ModelApi
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 from backend.Property import Property
 from backend.apis.GoogleSolarApi import GoogleSolarApi
-from etl.solar.SolarPhotoSupply import SolarPhotoSupply

 from recommendations.optimiser.CostOptimiser import CostOptimiser
 from recommendations.optimiser.GainOptimiser import GainOptimiser
@ -40,7 +42,11 @@ from recommendations.Mds import Mds
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
 from backend.ml_models.Valuation import PropertyValuation
+
 from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
+from etl.bill_savings.KwhData import KwhData
+from etl.spatial.OpenUprnClient import OpenUprnClient
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply

 logger = setup_logger()

@ -127,8 +133,8 @@ def extract_portfolio_aggregation_data(
            [r["energy_cost_savings"] for r in default_recommendations]
        )

-        pre_retrofit_energy_consumption = p.current_adjusted_energy
-        post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
+        pre_retrofit_energy_consumption = p.current_energy_consumption
+        post_retrofit_energy_consumption = p.current_energy_consumption - sum(
            [r["kwh_savings"] for r in default_recommendations]
        )

@ -219,6 +225,142 @@ def extract_portfolio_aggregation_data(
    return aggregation_data


+def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
+    """
+    This function will set up with epc_records dictionary with the newest EPC, the full SAP EPC and the older EPCs
+    and will factor in an energy assessment that we have performed for a client.
+    :param epc_searcher: An instance of the SearchEpc class
+    :param energy_assessment: The energy assessment we have performed. If we have not performed an energy assessment,
+                              this should be an empty response as defined by the models's
+                              EnergyAssessment.empty_response() method
+    """
+
+    if not energy_assessment["epc"]:
+        energy_assessment_is_newer = False
+        return {
+            'original_epc': epc_searcher.newest_epc.copy(),
+            'full_sap_epc': epc_searcher.full_sap_epc.copy(),
+            'old_data': epc_searcher.older_epcs.copy(),
+        }, energy_assessment_is_newer
+
+    epc = energy_assessment["epc"]
+    energy_assessment_date = epc["inspection-date"].strftime("%Y-%m-%d")
+
+    # We insert county into the epc, since right now this isn't something that we pull out from the energy
+    # assessment
+    for col in ["county", "constituency", "constituency-label", "local-authority", "local-authority-label"]:
+        epc[col] = epc_searcher.newest_epc[col]
+
+    # We check if the energy assessment is newer than the newest EPC
+    if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
+        # In this case, our energy assessment is newer than the EPCs available for this property
+        energy_assessment_is_newer = True
+        return {
+            "original_epc": epc,
+            "full_sap_epc": epc_searcher.full_sap_epc.copy(),
+            "old_data": epc_searcher.older_epcs.copy() + [epc_searcher.newest_epc.copy()]
+        }, energy_assessment_is_newer
+
+    # We check if the EPC we have produced is contained in the set of EPCs done for the property
+    # We do this based on inspection-date and SAP
+    epc_in_historicals = [
+        x for x in epc_searcher.older_epcs + [epc_searcher.newest_epc]
+        if x["inspection-date"] == energy_assessment_date and
+                   x["current-energy-efficiency"] == epc["current-energy-efficiency"]
+    ]
+    energy_assessment_is_newer = False
+
+    if epc_in_historicals:
+        # Then the EPC we have produced is already in the set of EPCs, and our EPC is older than the newest
+        return {
+            "original_epc": epc_searcher.newest_epc.copy(),
+            "full_sap_epc": epc_searcher.full_sap_epc.copy(),
+            "old_data": epc_searcher.older_epcs.copy()
+        }, energy_assessment_is_newer
+
+    # In this case, our EPC is older than the newest publically avaible one, but is not contained in
+    # the historicals, so it can't have been lodged, so we include it in the old data
+    return {
+        'original_epc': epc_searcher.newest_epc.copy(),
+        'full_sap_epc': epc_searcher.full_sap_epc.copy(),
+        'old_data': epc_searcher.older_epcs.copy() + [epc],
+    }, energy_assessment_is_newer
+
+
+def get_on_site_data(body: PlanTriggerRequest):
+    """
+    This function will read in the on-site data from the S3 bucket
+    :param body: The request body
+    :return:
+    """
+    patches = []
+    if body.patches_file_path:
+        patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
+
+    already_installed = []
+    if body.already_installed_file_path:
+        already_installed = read_csv_from_s3(
+            bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
+        )
+
+    non_invasive_recommendations = []
+    if body.non_invasive_recommendations_file_path:
+        non_invasive_recommendations = read_csv_from_s3(
+            bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
+        )
+
+    return patches, already_installed, non_invasive_recommendations
+
+
+def extract_property_on_site_recommendations(config, patches, already_installed, non_invasive_recommendations, uprn):
+    patch_has_uprn = "uprn" in patches[0] if patches else True
+    if patch_has_uprn:
+        patch = next((
+            x for x in patches if str(x["uprn"]) == str(config["uprn"])
+        ), {})
+    else:
+        patch = next((
+            x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+        ), {})
+
+    property_already_installed = next((
+        x for x in already_installed if
+        (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+    ), {})
+
+    # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
+    # we need to check existence of uprn
+    has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else True
+    if has_uprn:
+        property_non_invasive_recommendations = next((
+            x for x in non_invasive_recommendations if
+            (str(x["uprn"]) == str(uprn))
+        ), {})
+
+        # We patch the non-invasive recs that are ['cavity_extract_and_refill']
+    else:
+        property_non_invasive_recommendations = next((
+            x for x in non_invasive_recommendations if
+            (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+        ), {})
+
+    if isinstance(property_non_invasive_recommendations.get("recommendations"), str):
+        import ast
+        property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
+            property_non_invasive_recommendations["recommendations"]
+        )
+        transformed = []
+        for rec in property_non_invasive_recommendations["recommendations"]:
+            if isinstance(rec, str):
+                transformed.append({"type": rec, })
+            else:
+                transformed.append(rec)
+
+        property_non_invasive_recommendations["recommendations"] = str(transformed)
+
+    return patch, property_already_installed, property_non_invasive_recommendations
+
+
 router = APIRouter(
    prefix="/plan",
    tags=["plan"],
@ -233,9 +375,6 @@ async def trigger_plan(body: PlanTriggerRequest):
    session = sessionmaker(bind=db_engine)()
    created_at = datetime.now().isoformat()

-    # TODO: We should store the trigger file path in the database with the plan so we can track the file that
-    #       triggered the plan
-
    # TODO: if the measure is already installed, it should actually be the very first phase

    try:
@ -243,21 +382,7 @@ async def trigger_plan(body: PlanTriggerRequest):
        logger.info("Getting the inputs")
        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
        # If we have patches or overrides, we should read them in here
-        patches = []
-        if body.patches_file_path:
-            patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
-
-        already_installed = []
-        if body.already_installed_file_path:
-            already_installed = read_csv_from_s3(
-                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
-            )
-
-        non_invasive_recommendations = []
-        if body.non_invasive_recommendations_file_path:
-            non_invasive_recommendations = read_csv_from_s3(
-                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
-            )
+        patches, already_installed, non_invasive_recommendations = get_on_site_data(body)

        cleaning_data = read_dataframe_from_s3_parquet(
            bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
@ -281,30 +406,40 @@ async def trigger_plan(body: PlanTriggerRequest):
            epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
            # For the moment, our OS API access is unavailable, so we skip and interpolate
            epc_searcher.find_property(skip_os=True)
+
+            # We check for an energy assessment we have performed on this property:
+            energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn)
+
            # Create a record in db
            property_id, is_new = create_property(
                session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
            )
-            if not is_new:
+            if not is_new and not body.multi_plan:
                continue

-            create_property_targets(
-                session,
-                property_id=property_id,
-                portfolio_id=body.portfolio_id,
-                epc_target=body.goal_value,
-                heat_demand_target=None
+            if is_new:
+                create_property_targets(
+                    session,
+                    property_id=property_id,
+                    portfolio_id=body.portfolio_id,
+                    epc_target=body.goal_value,
+                    heat_demand_target=None
+                )
+
+            # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that.
+            # Otherwise, we use the newest EPC
+            # energy_assessment_is_newer will tell us if the energy assessment is newer than the newest EPC that
+            # has been publically lodged
+            epc_records, energy_assessment["energy_assessment_is_newer"] = create_epc_records(
+                epc_searcher, energy_assessment
            )

-            epc_records = {
-                'original_epc': epc_searcher.newest_epc.copy(),
-                'full_sap_epc': epc_searcher.full_sap_epc.copy(),
-                'old_data': epc_searcher.older_epcs.copy(),
-            }
+            patch, property_already_installed, property_non_invasive_recommendations = (
+                extract_property_on_site_recommendations(
+                    config, patches, already_installed, non_invasive_recommendations, uprn
+                )
+            )

-            patch = next((
-                x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), {})
            epc_records = patch_epc(patch, epc_records)

            prepared_epc = EPCRecord(
@ -313,25 +448,17 @@ async def trigger_plan(body: PlanTriggerRequest):
                cleaning_data=cleaning_data
            )

-            property_already_installed = next((
-                x for x in already_installed if
-                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), {})
-
-            property_non_invasive_recommendations = next((
-                x for x in non_invasive_recommendations if
-                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), {})
-
            input_properties.append(
                Property(
                    id=property_id,
+                    is_new=is_new,
                    address=epc_searcher.address_clean,
                    postcode=epc_searcher.postcode_clean,
                    epc_record=prepared_epc,
                    already_installed=property_already_installed,
                    non_invasive_recommendations=property_non_invasive_recommendations,
-                    **Property.extract_kwargs(config)
+                    energy_assessment=energy_assessment,
+                    **Property.extract_kwargs(config),  # TODO: Depraecate this
                )
            )

@ -345,12 +472,6 @@ async def trigger_plan(body: PlanTriggerRequest):
        materials = get_materials(session)
        cleaned = get_cleaned()

-        uprn_filenames = read_dataframe_from_s3_parquet(
-            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
-        )
-        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
-        solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
-
        dataset_version = "2024-07-08"
        energy_consumption_client = EnergyConsumptionModel(
            model_paths={
@ -363,27 +484,75 @@ async def trigger_plan(body: PlanTriggerRequest):
            environment=get_settings().ENVIRONMENT
        )

-        logger.info("Getting spatial data")
-        for p in input_properties:
-            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client)
-            p.get_spatial_data(uprn_filenames)
+        kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
+
+        model_api = ModelApi(
+            portfolio_id=body.portfolio_id,
+            timestamp=created_at,
+            prediction_buckets=get_prediction_buckets()
+        )
+
+        epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
+
+        kwh_preds = model_api.paginated_predictions(
+            data=epcs_for_scoring,
+            bucket=get_settings().DATA_BUCKET,
+            model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
+            extract_ids=False,
+            batch_size=SCORING_BATCH_SIZE
+        )
+
+        # Insert the spatial data
+        logger.info("Getting spatial data")
+        input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
+
+        [p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties]
+        logger.info("Performing solar analysis")
+
+        # TODO: Tidy this up
+        # TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour
+        # TODO: If we can't get high image quality, should we use the solar API? Maybe just for semi-detached units with
+        #       extensions, since it doesn't seem to do a great job
+        # TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
+        #       basic estimate of roof area

-        # TODO: Handle the case of modelling some units as buildings and some as properties individually
        building_ids = [
            {
                "building_id": p.building_id,
                "longitude": p.spatial["longitude"],
                "latitude": p.spatial["latitude"],
                # Energy consumption is adjusted for the property's expected post retrofit state
+                # We set the target rating to EPC C, which is the typical EPC rating we would expect the
+                # property to achieve post retrofit of just the fabric
                "energy_consumption": energy_consumption_client.estimate_new_consumption(
-                    current_rating=p.data["current-energy-rating"],
-                    target_rating=body.goal_value,
-                    current_consumption=p.current_adjusted_energy
+                    current_energy_efficiency=p.data["current-energy-efficiency"],
+                    target_efficiency="69",
+                    current_consumption=p.estimate_electrical_consumption(
+                        assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
+                    )
                ),
                "property_id": p.id,
                "uprn": p.uprn
            } for p in input_properties if p.building_id is not None
        ]
+        individual_units = [
+            {
+                "longitude": p.spatial["longitude"],
+                "latitude": p.spatial["latitude"],
+                # Energy consumption is adjusted for the property's expected post retrofit state
+                # We set the target rating to EPC C, which is the typical EPC rating we would expect the
+                # property to achieve post retrofit of just the fabric
+                "energy_consumption": energy_consumption_client.estimate_new_consumption(
+                    current_energy_efficiency=p.data["current-energy-efficiency"],
+                    target_efficiency="69",
+                    current_consumption=p.estimate_electrical_consumption(
+                        assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
+                    ),
+                ),
+                "property_id": p.id,
+                "uprn": p.uprn
+            } for p in input_properties if p.building_id is None
+        ]
        if building_ids:
            # Find the unique longitude and latitude pairs for each building id
            unique_coordinates = {}
@ -417,6 +586,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                energy_consumption = sum(
                    [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
                )
+                solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
                solar_api_client.get(
                    longitude=coordinates["longitude"],
                    latitude=coordinates["latitude"],
@ -431,7 +601,8 @@ async def trigger_plan(body: PlanTriggerRequest):
                }

                # Store the data in the database
-                # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists
+                # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
+                #  exists
                solar_api_client.save_to_db(
                    session=session, uprns_to_location=building_uprns[building_id], scenario_type="building"
                )
@ -446,23 +617,67 @@ async def trigger_plan(body: PlanTriggerRequest):
                            energy_consumption
                        )
                        p.set_solar_panel_configuration(unit_solar_panel_configuration)
+        if individual_units:
+            # Model the solar potential at the property level
+            for unit in tqdm(individual_units):

-        else:
-            # # Model the solar potential at the property level
-            # for p in input_properties:
-            #     # TODO: Complete me! - we probably won't do this for individual flats
-            #     solar_performance = solar_api_client.get(
-            #         longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]
-            #     )
-            print("Implement me")
+                # TODO: Tidy up this code
+                # We don't need to do this if we have global inclusions that don't include solar
+                if body.inclusions:
+                    if "solar_pv" not in body.inclusions:
+                        continue

-        logger.info("Getting components and epc recommendations")
+                property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
+                # At this level, we check if the property is suitable for solar and if now, skip
+                if not property_instance.is_solar_pv_valid():
+                    continue
+
+                # We check if we have a solar non-invasive recommendation
+                if [r for r in property_instance.non_invasive_recommendations if r["type"] == "solar_pv"]:
+                    continue
+                solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
+                solar_api_client.get(
+                    longitude=unit["longitude"],
+                    latitude=unit["latitude"],
+                    energy_consumption=unit["energy_consumption"],
+                    is_building=False,
+                    session=session,
+                    uprn=unit["uprn"],
+                    property_instance=property_instance
+                )
+
+                # Store the data in the database
+                # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
+                #  exists
+                solar_api_client.save_to_db(
+                    session=session,
+                    uprns_to_location=[
+                        {
+                            "uprn": property_instance.uprn,
+                            "longitude": property_instance.spatial["longitude"],
+                            "latitude": property_instance.spatial["latitude"]
+                        }
+                    ],
+                    scenario_type="unit"
+                )
+
+                property_instance.set_solar_panel_configuration(
+                    solar_panel_configuration={
+                        "insights_data": solar_api_client.insights_data,
+                        "panel_performance": solar_api_client.panel_performance,
+                        "unit_share_of_energy": 1
+                    },
+                    roof_area=solar_api_client.roof_area
+                )
+
+        logger.info("Identifying property recommendations")
        recommendations = {}
        recommendations_scoring_data = []
        representative_recommendations = {}
        for p in tqdm(input_properties):
-
-            recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
+            recommender = Recommendations(
+                property_instance=p, materials=materials, exclusions=body.exclusions, inclusions=body.inclusions
+            )
            property_recommendations, property_representative_recommendations = recommender.recommend()

            if not property_recommendations:
@ -479,7 +694,6 @@ async def trigger_plan(body: PlanTriggerRequest):
            recommendations_scoring_data.extend(p.recommendations_scoring_data)

        # TODO: Make sure that number_habitable_rooms has been dropped
-
        logger.info("Preparing data for scoring in sap change api")
        recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)

@ -488,54 +702,69 @@ async def trigger_plan(body: PlanTriggerRequest):
                     "carbon_ending"]
        )

-        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
+        all_predictions = model_api.paginated_predictions(
+            data=recommendations_scoring_data,
+            bucket=get_settings().DATA_BUCKET,
+            batch_size=SCORING_BATCH_SIZE
+        )

-        all_predictions = model_api.predictions_template()
-        to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
-        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
-            predictions_dict = model_api.predict_all(
-                df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
-                bucket=get_settings().DATA_BUCKET,
-                prediction_buckets=get_prediction_buckets()
+        # Insert the predictions into the recommendations, and get the impact summary
+        scoring_epcs = []  # For scoring the kwh models
+        for property_id in recommendations.keys():
+            property_instance = [p for p in input_properties if p.id == property_id][0]
+
+            recommendations_with_impact, impact_summary = (
+                Recommendations.calculate_recommendation_impact(
+                    property_instance=property_instance,
+                    all_predictions=all_predictions,
+                    recommendations=recommendations,
+                )
            )

-            # Append the predictions to the predictions dictionary
-            for key, scored in predictions_dict.items():
-                all_predictions[key] = pd.concat([all_predictions[key], scored])
+            # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc
+            # at each phase
+            property_instance.update_simulation_epcs(impact_summary)
+            scoring_epcs.extend(property_instance.updated_simulation_epcs)
+            recommendations[property_id] = recommendations_with_impact
+
+        # We call the API with the scoring epcs
+        scoring_epcs = pd.DataFrame(scoring_epcs)
+        scoring_epcs = kwh_client.transform(data=scoring_epcs, cleaned=cleaned)
+
+        kwh_simulation_predictions = model_api.paginated_predictions(
+            data=scoring_epcs,
+            bucket=get_settings().DATA_BUCKET,
+            model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
+            batch_size=SCORING_BATCH_SIZE
+        )
+
+        # We now insert kwh estimates and costs into the recommendations
+        # TODO: We should join the methodology which maps the heating and hot water descriptions to the fuel types in
+        #       Recommendations, but also the Property class
+        logger.info("Calculating tenant savings - kwh and bills")
+        for property_id in tqdm([p.id for p in input_properties]):
+            property_recommendations = recommendations.get(property_id, [])
+            property_instance = [p for p in input_properties if p.id == property_id][0]
+
+            property_current_energy_bill = Recommendations.calculate_recommendation_tenant_savings(
+                property_instance=property_instance,
+                kwh_simulation_predictions=kwh_simulation_predictions,
+                property_recommendations=property_recommendations
+            )
+            property_instance.current_energy_bill = property_current_energy_bill

        # Insert the predictions into the recommendations and run the optimiser
        # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
        #       possibility with heating system
        # TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
        #      cylinder jacket), we should add these to the recommendations as default
-        logger.info("Optimising recommendations")
-        for property_id in recommendations.keys():

-            property_instance = [p for p in input_properties if p.id == property_id][0]
+        for p in input_properties:
+            if not recommendations.get(p.id):
+                continue
+            input_measures = prepare_input_measures(recommendations[p.id], body.goal)

-            (
-                recommendations_with_impact,
-                expected_adjusted_energy,
-                expected_energy_bill
-            ) = (
-                Recommendations.calculate_recommendation_impact(
-                    property_instance=property_instance,
-                    all_predictions=all_predictions,
-                    recommendations=recommendations,
-                    representative_recommendations=representative_recommendations,
-                    energy_consumption_client=energy_consumption_client
-                )
-            )
-
-            # Store the resulting adjusted energy in the property instance
-            property_instance.set_adjusted_energy(
-                expected_adjusted_energy=expected_adjusted_energy,
-                expected_energy_bill=expected_energy_bill
-            )
-
-            input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
-
-            current_sap_points = int(property_instance.data["current-energy-efficiency"])
+            current_sap_points = int(p.data["current-energy-efficiency"])
            target_sap_points = epc_to_sap_lower_bound(body.goal_value)
            sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)

@ -562,7 +791,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
            ]):
                ventilation_rec = next(
-                    (r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
+                    (r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
                    None
                )

@ -576,20 +805,35 @@ async def trigger_plan(body: PlanTriggerRequest):
                    {**rec, "default": True if rec["recommendation_id"] in selected_recommendations else False}
                    for rec in recommendations_by_type
                ]
-                for recommendations_by_type in recommendations_with_impact
+                for recommendations_by_type in recommendations[p.id]
            ]

            # We'll also unlist the recommendations so they're a bit easier to handle from here onwards
            final_recommendations = [
                rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type
            ]
-            recommendations[property_id] = final_recommendations
-
-        # 1) the property data
-        # 2) the property details (epc)
-        # 3) the recommendations
+            recommendations[p.id] = final_recommendations

        logger.info("Uploading recommendations to the database")
+        # If we have any work to do, we create a new scenario
+        engine_scenario = create_scenario(
+            session=session,
+            scenario={
+                "name": body.scenario_name,
+                "created_at": created_at,
+                "budget": body.budget,
+                "portfolio_id": body.portfolio_id,
+                "housing_type": body.housing_type,
+                "goal": body.goal,
+                "trigger_file_path": body.trigger_file_path,
+                "already_installed_file_path": body.already_installed_file_path,
+                "patches_file_path": body.patches_file_path,
+                "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
+                "exclusions": body.exclusions,
+                "multi_plan": body.multi_plan
+            }
+        )
+
        property_valuation_increases = []
        session.commit()
        new_epc_bands = {}
@ -610,18 +854,18 @@ async def trigger_plan(body: PlanTriggerRequest):
                    valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
                    property_value_increase_ranges[p.id] = valuations

-                    # Your existing operations
-                    property_details_epc = p.get_property_details_epc(
-                        portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
-                    )
-                    create_property_details_epc(session, property_details_epc)
+                    if p.is_new:
+                        property_details_epc = p.get_property_details_epc(
+                            portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
+                        )
+                        create_property_details_epc(session, property_details_epc)

-                    update_or_create_property_spatial_details(session, p.uprn, p.spatial)
+                        update_or_create_property_spatial_details(session, p.uprn, p.spatial)

-                    property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
-                    update_property_data(
-                        session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
-                    )
+                        property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
+                        update_property_data(
+                            session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
+                        )

                    if not recommendations_to_upload:
                        continue
@ -629,7 +873,9 @@ async def trigger_plan(body: PlanTriggerRequest):
                    new_plan_id = create_plan(session, {
                        "portfolio_id": body.portfolio_id,
                        "property_id": p.id,
-                        "is_default": True,
+                        "scenario_id": engine_scenario.id,
+                        "is_default": True if p.is_new else False,
+                        "name": body.scenario_name,
                        "valuation_increase_lower_bound": (
                            valuations["lower_bound_increased_value"] - valuations["current_value"]
                        ),
@ -641,10 +887,8 @@ async def trigger_plan(body: PlanTriggerRequest):
                        ),
                    })

-                    uploaded_recommendation_ids = upload_recommendations(session, recommendations_to_upload, p.id)
-
-                    create_plan_recommendations(
-                        session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
+                    upload_recommendations(
+                        session, recommendations_to_upload, p.id, new_plan_id
                    )

                    property_valuation_increases.append(
@ -683,6 +927,7 @@ async def trigger_plan(body: PlanTriggerRequest):
        aggregate_portfolio_recommendations(
            session,
            portfolio_id=body.portfolio_id,
+            scenario_id=engine_scenario.id,
            total_valuation_increase=total_valuation_increase,
            labour_days=labour_days,
            aggregated_data=aggregated_data
@ -817,6 +1062,7 @@ async def build_mds(body: MdsRequest):
                    # already_installed=property_already_installed,
                    # non_invasive_recommendations=property_non_invasive_recommendations,
                    measures=measures,
+                    is_new=is_new,
                    **Property.extract_kwargs(config)
                )
            )
@ -840,7 +1086,7 @@ async def build_mds(body: MdsRequest):
        recommendations = {}

        for p in tqdm(input_properties):
-            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
+            p.set_features(cleaned, photo_supply_lookup, floor_area_decile_thresholds)

            mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
            mds_recommendations, property_representative_recommendations, errors = mds.build()
@ -889,7 +1135,9 @@ async def build_mds(body: MdsRequest):
                     "carbon_ending"]
        )

-        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
+        model_api = ModelApi(
+            portfolio_id=body.portfolio_id, timestamp=created_at, prediction_buckets=get_prediction_buckets()
+        )

        all_predictions = {
            "sap_change_predictions": pd.DataFrame(),
@ -900,12 +1148,6 @@ async def build_mds(body: MdsRequest):
        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
            predictions_dict = model_api.predict_all(
                df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
-                bucket=get_settings().DATA_BUCKET,
-                prediction_buckets={
-                    "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
-                    "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
-                    "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
-                }
            )

            # Append the predictions to the predictions dictionary
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@ -1,6 +1,56 @@
 from pydantic import BaseModel, conlist, validator
 from typing import Optional

+TYPICAL_MEASURE_TYPES = [
+    "wall_insulation",
+    "roof_insulation",
+    "ventilation",
+    "floor_insulation",
+    "windows",
+    "fireplace",
+    "heating",
+    "hot_water",
+    "low_energy_lighting",
+    "secondary_heating",
+    "solar_pv"
+]
+
+SPECIFIC_MEASURES = [
+    # Specific measures
+    # Walls
+    "internal_wall_insulation",
+    "external_wall_insulation",
+    "cavity_wall_insulation"
+    # Roof
+    "loft_insulation",
+    "flat_roof_insulation",
+    "room_roof_insulation",
+    # Floor
+    "suspended_floor_insulation",
+    "solid_floor_insulation",
+    # Heating
+    "boiler_upgrade",
+    "high_heat_retention_storage_heater",
+    "air_source_heat_pump",
+
+    # Specific measures that will typically come from an energy assessment
+    "trickle_vents",
+    "draught_proofing",
+    "mixed_glazing",  # This covers partial double glazing and secondary glazing
+    "cavity_extract_and_refill",
+]
+
+# This allows us to extend high level categories for measures such as "wall_insulation" to the specific measures
+# such as "external_wall_insulation", "internal_wall_insulation", "cavity_wall_insulation"
+MEASURE_MAP = {
+    "wall_insulation": [
+        "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", "cavity_extract_and_refill"
+    ],
+    "roof_insulation": ["loft_insulation", "flat_roof_insulation", "room_roof_insulation"],
+    "floor_insulation": ["suspended_floor_insulation", "solid_floor_insulation"],
+    "heating": ["boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump"],
+}
+

 class PlanTriggerRequest(BaseModel):
    budget: Optional[float] = None
@ -13,35 +63,30 @@ class PlanTriggerRequest(BaseModel):
    patches_file_path: Optional[str] = None
    non_invasive_recommendations_file_path: Optional[str] = None
    exclusions: Optional[conlist(str, min_items=1)] = None
+    inclusions: Optional[conlist(str, min_items=1)] = None

-    # Pre-defined list of possibilities for exclusions
-    _allowed_exclusions = {
-        # Measure classes
-        "wall_insulation",
-        "ventilation",
-        "roof_insulation",
-        "floor_insulation",
-        "windows",
-        "fireplace",
-        "heating",
-        "hot_water",
-        "lighting",
-        "solar_pv",
-        # Specific measures
-        "air_source_heat_pump",
-    }
+    scenario_name: Optional[str] = ""
+    # If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
+    # exists in the portfolio, it will be ignored
+    multi_plan: Optional[bool] = False

-    _allowed_goals = {"Increase EPC"}
+    _allowed_goals = {"Increasing EPC"}

    _allowed_housing_types = {"Social", "Private"}

    # Validator to ensure exclusions are within the pre-defined possibilities
    @validator('exclusions', each_item=True)
    def check_exclusions(cls, v):
-        if v not in cls._allowed_exclusions:
+        if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES:
            raise ValueError(f"{v} is not an allowed exclusion")
        return v

+    @validator('inclusions', each_item=True)
+    def check_inclusions(cls, v):
+        if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES:
+            raise ValueError(f"{v} is not an allowed inclusion")
+        return v
+
    # Validator to ensure that the goal is within the pre-defined possibilities
    @validator('goal')
    def check_goal(cls, v):
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@ -1,4 +1,6 @@
 import numpy as np
+import pandas as pd
+import backend.app.assumptions as assumptions

 QUARTERLY_ENERGY_PRICES = [
    # 2024 Q1
@ -40,6 +42,53 @@ class AnnualBillSavings:
    DAILY_STANDARD_CHARGE_GAS = 0.3143
    DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601

+    # Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
+    # For July 2024. These quotes are based on the east midlands region, so we
+    FUEL_DATA = pd.DataFrame([
+        {"Fuel": "Electricity Standard", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 100,
+         "Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 28.58,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.275},
+        {"Fuel": "Mains Gas Standard", "Price (p)": 6.31, "Unit": "kWh", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 7.01,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.213},
+        {"Fuel": "Kerosene", "Price (p)": 62.49, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 9.79, "Price per kWh (p) (inc boiler efficiency)": 7.09,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.298},
+        {"Fuel": "Gas oil", "Price (p)": 94.50, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 9.96, "Price per kWh (p) (inc boiler efficiency)": 10.54,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.316},
+        {"Fuel": "LPG", "Price (p)": 55.00, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 6.78, "Price per kWh (p) (inc boiler efficiency)": 9.01,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.240},
+        {"Fuel": "Butane", "Price (p)": 216.58, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 6.64, "Price per kWh (p) (inc boiler efficiency)": 36.24,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.248},
+        {"Fuel": "Propane", "Price (p)": 157.67, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 7.22, "Price per kWh (p) (inc boiler efficiency)": 24.25,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.239},
+        {"Fuel": "Kiln Dried (logs)", "Price (p)": 36.52, "Unit": "kg", "Boiler Efficiency (%)": 85,
+         "Energy Content, Net Calorific value (kWh/unit)": 4.09, "Price per kWh (p) (inc boiler efficiency)": 10.51,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.024},
+        {"Fuel": "Pellets (Bagged)", "Price (p)": 39.62, "Unit": "kg", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 4.80, "Price per kWh (p) (inc boiler efficiency)": 9.17,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.049},
+        {"Fuel": "Pellets (Blown bulk)", "Price (p)": 33.92, "Unit": "kg", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 4.80, "Price per kWh (p) (inc boiler efficiency)": 7.85,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.049},
+        {"Fuel": "Smokeless fuel", "Price (p)": 67.26, "Unit": "kg", "Boiler Efficiency (%)": 75,
+         "Energy Content, Net Calorific value (kWh/unit)": 6.70, "Price per kWh (p) (inc boiler efficiency)": 13.38,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.404},
+        {"Fuel": "Coal", "Price (p)": 48.50, "Unit": "kg", "Boiler Efficiency (%)": 75,
+         "Energy Content, Net Calorific value (kWh/unit)": 7.95, "Price per kWh (p) (inc boiler efficiency)": 8.13,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.404},
+        {"Fuel": "GSHP", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 350,
+         "Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 8.17,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.079},
+        {"Fuel": "ASHP", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 294,
+         "Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 9.72,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.094}
+    ])
+
    EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]

    @classmethod
@ -199,3 +248,75 @@ class AnnualBillSavings:
            return current_epc_rating

        return cls.EPC_BANDS[expected_index - 1]
+
+    @staticmethod
+    def cost_per_kwh(price_per_unit, energy_content_per_unit):
+        """
+        Calculate the cost of fuel per kWh given the price per unit in GBP and the energy content per unit in kWh.
+        """
+        cost_per_kwh = price_per_unit / energy_content_per_unit
+        # Tgis data is returned in pennies so we convert to pounds
+        return cost_per_kwh / 100
+
+    @classmethod
+    def calculate_recommendation_fuel_cost(cls, kwh, fuel, cop):
+        if fuel == "Electricity":
+            return (kwh / cop) * cls.ELECTRICITY_PRICE_CAP
+
+        if fuel in ["Natural Gas", "Natural Gas (Community Scheme)"]:
+            return (kwh / cop) * cls.GAS_PRICE_CAP
+
+        if fuel == "LPG":
+            # Get the cost per kwh
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh
+
+        if fuel in ["Wood Logs", "Wood Pellets"]:
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Pellets (Bagged)"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh
+
+        if fuel == "Natural Gas + Solar Thermal":
+            # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
+            return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP
+
+        if fuel == "Electricity + Solar Thermal":
+            # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
+            return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP
+
+        if fuel == "LPG + Solar Thermal":
+            # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh * assumptions.SOLAR_CONSUMPTION_PROPORTION
+
+        if fuel == "Oil":
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh
+
+        if fuel in ["Smokeless Fuel", "Anthracite"]:
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Smokeless fuel"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh
+
+        # We use coal's values for
+        if fuel == "Coal":
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Coal"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh
+
+        raise Exception("Fuel not recognised")
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@ -100,6 +100,9 @@ class PropertyValuation:
        200140647: 481_000,
        200140648: 373_000,
        200140649: 373_000,
+        # Vander Elliot Intrusive surveys
+        12103116: 1_537_000,
+        12103117: 1_404_000,
    }

    # We base our valuation uplifts on a number of sources
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@ -1,4 +1,5 @@
 import pandas as pd
+from tqdm import tqdm
 import requests
 from requests.exceptions import RequestException
 from utils.logger import setup_logger
@ -12,24 +13,27 @@ class ModelApi:
        "sap_change_predictions",
        "heat_demand_predictions",
        "carbon_change_predictions",
-        "lighting_cost_predictions",
-        "heating_cost_predictions",
-        "hot_water_cost_predictions",
+        # "lighting_cost_predictions",
+        # "heating_cost_predictions",
+        # "hot_water_cost_predictions",
    ]

    MODEL_URLS = {
        "sap_change_predictions": "sapmodel",
        "heat_demand_predictions": "heatmodel",
        "carbon_change_predictions": "carbonmodel",
-        "lighting_cost_predictions": "lightingmodel",
-        "heating_cost_predictions": "heatingmodel",
-        "hot_water_cost_predictions": "hotwatermodel",
+        "hotwater_kwh_predictions": "hotwaterkwhmodel",
+        "heating_kwh_predictions": "heatingkwhmodel",
+        # "lighting_cost_predictions": "lightingmodel",
+        # "heating_cost_predictions": "heatingmodel",
+        # "hot_water_cost_predictions": "hotwatermodel",
    }

    def __init__(
        self,
        portfolio_id,
        timestamp,
+        prediction_buckets,
        base_url="https://api.dev.hestia.homes",
    ):
        """
@ -44,6 +48,7 @@ class ModelApi:
        self.base_url = base_url
        self.portfolio_id = portfolio_id
        self.timestamp = timestamp
+        self.prediction_buckets = prediction_buckets

    @staticmethod
    def predictions_template():
@ -51,9 +56,8 @@ class ModelApi:
            "sap_change_predictions": pd.DataFrame(),
            "heat_demand_predictions": pd.DataFrame(),
            "carbon_change_predictions": pd.DataFrame(),
-            "lighting_cost_predictions": pd.DataFrame(),
-            "heating_cost_predictions": pd.DataFrame(),
-            "hot_water_cost_predictions": pd.DataFrame(),
+            "hotwater_kwh_predictions": pd.DataFrame(),
+            "heating_kwh_predictions": pd.DataFrame(),
        }

    def upload_scoring_data(self, df: pd.DataFrame, bucket: str, model_prefix: str) -> str:
@ -68,8 +72,8 @@ class ModelApi:
        :return:
        """

-        if model_prefix not in self.MODEL_PREFIXES:
-            raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
+        # if model_prefix not in self.MODEL_PREFIXES:
+        #     raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")

        # Store parquet file in s3 for scoring
        file_location = f"{model_prefix}/{self.portfolio_id}/{self.timestamp}.parquet"
@ -123,7 +127,7 @@ class ModelApi:
        else:
            return None

-    def predict_all(self, df, bucket, prediction_buckets) -> dict:
+    def predict_all(self, df, bucket, model_prefixes=None, extract_ids=True) -> dict:

        """
        For each model prefix, this method will upload the scoring data to s3 and then make a request to the
@ -132,19 +136,24 @@ class ModelApi:
        a dictionary of panaas dataframes
        :param df:  Pandas dataframe with scoring data to be uploaded to s3
        :param bucket: Name of the bucket in s3 to upload to
-        :param prediction_buckets: Dictionary containing the prediction buckets for each model prefix
+        :param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be
+        used
+        :param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the
+        id column
        :return:
        """

+        model_prefixes = self.MODEL_PREFIXES if model_prefixes is None else model_prefixes
+
        predictions = {}
-        for model_prefix in self.MODEL_PREFIXES:
+        for model_prefix in model_prefixes:
            logger.info(f"Scoring for model prefix: {model_prefix}")
            file_location = self.upload_scoring_data(df, bucket, model_prefix)
            response = self.predict(
                "s3://{DATA_BUCKET}/".format(DATA_BUCKET=bucket) + file_location, model_prefix
            )

-            predictions_bucket = prediction_buckets[model_prefix]
+            predictions_bucket = self.prediction_buckets[model_prefix]

            # Retrieve the predictions
            predictions_df = pd.DataFrame(
@ -155,16 +164,35 @@ class ModelApi:
            )

            predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
-            predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
-            # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
-            # string split on phase= and then grab the second element of the resulting list. We could also use a
-            # regular expression to do this but we use the string split method here, for safety.
-            # We may not always have a phase to split on, so we need to handle this case. We can do this by using the
-            # str[1] method to grab the second element of the resulting list. We then grab the first character of this
-            # string to get the phase. We then convert this to an integer.
-            # Convert back to int
-            predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
+            if extract_ids:
+                predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
+                # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
+                # string split on phase= and then grab the second element of the resulting list. We could also use a
+                # regular expression to do this but we use the string split method here, for safety.
+                # We may not always have a phase to split on, so we need to handle this case. We can do this by using
+                # the str[1] method to grab the second element of the resulting list. We then grab the first
+                # character of this
+                # string to get the phase. We then convert this to an integer.
+                # Convert back to int
+                predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)

            predictions[model_prefix] = predictions_df

        return predictions
+
+    def paginated_predictions(self, data, bucket, batch_size, model_prefixes=None, extract_ids=True):
+        all_predictions = self.predictions_template()
+        to_loop_over = range(0, data.shape[0], batch_size)
+        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+            predictions_dict = self.predict_all(
+                df=data.iloc[chunk:chunk + batch_size],
+                bucket=bucket,
+                model_prefixes=model_prefixes,
+                extract_ids=extract_ids
+            )
+
+            # Append the predictions to the predictions dictionary
+            for key, scored in predictions_dict.items():
+                all_predictions[key] = pd.concat([all_predictions[key], scored])
+
+        return all_predictions
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@ -6,6 +6,7 @@ from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percenta
 from sklearn.feature_selection import RFECV
 from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_csv_from_s3
 from utils.logger import setup_logger
+from backend.Property import Property

 logger = setup_logger()

@ -102,6 +103,7 @@ class EnergyConsumptionModel:
            # We also retrieve the newest retail price comparison data which comes from Ofgem:
            # https://www.ofgem.gov.uk/energy-data-and-research/data-portal/retail-market-indicators
            # We use the detail price comparison by company and tariff type data
+            print("Reading retail price comparison - make sure this is up-to-date")
            self.read_retail_price_comparison()

    def read_retail_price_comparison(self):
@ -124,37 +126,6 @@ class EnergyConsumptionModel:
        self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
        self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')

-    def convert_cost_to_today(self, original_cost, lodgement_date):
-        """
-        Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
-        (or as close to today as possible)
-        :param original_cost: The original energy cost
-        :param lodgement_date: The date the EPC was lodged
-        :return:
-        """
-        closest_date = self.retail_price_comparison.iloc[
-            (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
-        ]['Date'].values[0]
-        closest_date = pd.Timestamp(closest_date)
-
-        # Extract the tariff price on the closest date
-        tariff_2024 = self.retail_price_comparison[
-            self.retail_price_comparison['Date'] == closest_date
-            ]['Average standard variable tariff (Large legacy suppliers)'].values[0]
-
-        # Extract the latest available tariff price
-        latest_tariff = self.retail_price_comparison[
-            'Average standard variable tariff (Large legacy suppliers)'
-        ].iloc[-1]
-
-        # Calculate the ratio
-        ratio = float(latest_tariff) / float(tariff_2024)
-
-        # Calculate the updated heating cost
-        updated_cost = original_cost * ratio
-
-        return updated_cost
-
    def read_dataset(self, file_path):
        """Reads the dataset from the specified file path."""
        logger.info(f"Reading dataset from {file_path}")
@ -506,31 +477,36 @@ class EnergyConsumptionModel:
        return prediction

    @staticmethod
-    def calculate_percentage_decrease(start_rating, end_rating, consumption_averages):
+    def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):

        start_consumption = consumption_averages.loc[
-            consumption_averages["current-energy-rating"] == start_rating, "total_consumption"
+            consumption_averages["current-energy-efficiency"].astype(str) == str(start_efficiency), "total_consumption"
        ].values[0]
+
        end_consumption = consumption_averages.loc[
-            consumption_averages["current-energy-rating"] == end_rating, "total_consumption"
+            consumption_averages["current-energy-efficiency"].astype(str) == str(end_efficiency), "total_consumption"
        ].values[0]

        percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100
+        # percentage_decrease cannot be nehative
+        if percentage_decrease < 0:
+            percentage_decrease = 0
        return percentage_decrease

-    def estimate_new_consumption(self, current_rating, target_rating, current_consumption):
+    def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption):
        """
-        Given then consumption_averages dataset, which is produced as a result of the data_combining.py script,
+        Given then consumption_averages dataset, which is produced as a result of the training_data.py script,
        for the energy kwh models, this function will estimate the new consumption based on the current consumption,
        based on the expected reduction in consumption from the current rating to the target rating.
-        :param current_rating:
-        :param target_rating:
+        :param current_energy_efficiency:
+        :param target_efficiency:
        :param current_consumption:
-        :param df:
        :return:
        """
        percentage_decrease = self.calculate_percentage_decrease(
-            current_rating, target_rating, self.consumption_averages
+            start_efficiency=current_energy_efficiency,
+            end_efficiency=target_efficiency,
+            consumption_averages=self.consumption_averages
        )
        new_consumption = current_consumption * (1 - percentage_decrease / 100)
        return new_consumption
--- a/etl/bill_savings/KwhData.py
+++ b/etl/bill_savings/KwhData.py
@ -0,0 +1,363 @@
+import re
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from tqdm import tqdm
+from utils.logger import setup_logger
+from utils.s3 import (
+    list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet,
+    read_csv_from_s3
+)
+from backend.Property import Property
+
+logger = setup_logger()
+
+
+class KwhData:
+    COLS_TO_STRINGIFY = ["main-heating-controls", "floor-level"]
+
+    CATEGORICAL_COLUMNS = [
+        "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
+        "number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
+        "built-form",
+        "construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
+        "walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
+        "county",
+        "windows-description", "windows-energy-eff", "flat-top-storey",
+        "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
+        "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating",
+        "floor-level"
+    ]
+
+    NUMERICAL_COLUMNS = [
+        'heating-cost-current', 'total-floor-area', 'co2-emissions-current', 'energy-consumption-current',
+        'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
+    ]
+
+    def __init__(self, bucket=None, read_consumption_data=False):
+        self.run_date = datetime.now().strftime("%Y-%m-%d")
+        self.bucket = bucket
+        self.data = None
+
+        self.consumption_data_filepath = None
+        self.consumption_averages_filepath = None
+        self.model_training_data_filepath = None
+
+        self.consumption_averages = None
+        self.retail_price_comparison = None
+        if read_consumption_data:
+            self.get_consumption_data()
+            self.read_retail_price_comparison()
+
+    def get_consumption_data(self):
+
+        # Look for the latest version of this file
+        s3_contents = list_files_in_s3_folder(bucket_name=self.bucket, folder_name="energy_consumption/")
+        consumption_averages = [
+            {"run_date": pd.to_datetime(x.split("/")[1]), "filepath": x}
+            for x in s3_contents if "consumption_averages.parquet" in x
+        ]
+        # Get the file with the soonest run date
+        consumption_averages = sorted(consumption_averages, key=lambda x: x["run_date"])
+        if not consumption_averages:
+            raise ValueError("No consumption averages data found, something went wrong")
+
+        self.consumption_averages = read_dataframe_from_s3_parquet(
+            bucket_name=self.bucket,
+            file_key=consumption_averages[-1]["filepath"]
+        )
+
+    def read_retail_price_comparison(self):
+        data = read_csv_from_s3(
+            bucket_name=self.bucket,
+            filepath="energy_consumption/retail-price-comparison.csv"
+        )
+        header = ['Date', 'Average standard variable tariff (Large legacy suppliers)',
+                  'Average standard variable tariff (Other suppliers)', 'Average fixed tariff',
+                  'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)',
+                  'Cheapest tariff (Basket)', 'Default tariff cap level']
+
+        # Extract data rows
+        data_rows = []
+        for row in data[1:]:
+            date = row['\ufeff"']
+            values = row[None]
+            data_rows.append([date] + values)
+
+        self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
+        self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
+
+    @staticmethod
+    def extract_kwh_value(text: str):
+        """
+        Extract the numerical kWh value from a given string.
+
+        :param text: The input string containing the kWh value.
+        :return: The extracted numerical kWh value as an integer.
+        """
+        # Use regular expression to find the numerical value followed by "kWh per year"
+        match = re.search(r'([\d,]+) kWh per year', text)
+
+        if match:
+            # Remove commas from the extracted value and convert to integer
+            kwh_value = int(match.group(1).replace(',', ''))
+            return kwh_value
+        else:
+            # If no match is found, return None or raise an exception
+            return None
+
+    def combine(self):
+        """
+        Given the data that is collected containing the kwh values for heating and hot water, this method will combine
+        and save the data
+        :return:
+        """
+
+        # Firstly, list all of the saved files in s3
+        data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
+
+        complete_data = []
+        for files in tqdm(data_files):
+            dataset_run_date = files.split("/")[-1].split(".")[0]
+            # Extract the date from the file name
+            dataset_run_date = pd.Timestamp(dataset_run_date)
+
+            # Load the data from the file
+            data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
+
+            # We check that the retrieved energy consumption sufficiently matches the EPC data
+            internal_dataset = []
+            for x in data:
+                epc_data = x["epc"]
+                epc_sap = epc_data["current-energy-efficiency"]
+                epc_potential_sap = epc_data["potential-energy-efficiency"]
+                # Make sure this matches the extracted sap
+                if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
+                    x["potential_epc_efficiency"]
+                ):
+                    continue
+
+                heating_kwh = self.extract_kwh_value(x["heating_text"])
+                hot_water_kwh = self.extract_kwh_value(x["hot_water_text"])
+                internal_dataset.append(
+                    {
+                        **epc_data,
+                        "heating_kwh": heating_kwh,
+                        "hot_water_kwh": hot_water_kwh,
+                        "dataset_run_date": dataset_run_date
+                    }
+                )
+
+            complete_data.extend(internal_dataset)
+
+        df = pd.DataFrame(complete_data)
+        # Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
+        # the uprn level, so we dedupe based on the newest dataset_run_date
+
+        df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
+        df = df.drop(columns=["dataset_run_date"])
+
+        for col in self.COLS_TO_STRINGIFY:
+            df[col] = df[col].astype(str)
+
+        # Save the data back to s3, but this time as a parquet file
+        self.consumption_data_filepath = f"energy_consumption/{self.run_date}/energy_consumption_dataset.parquet"
+        logger.info(f"Storing energy consumption dataset in s3 at {self.consumption_data_filepath}")
+        save_dataframe_to_s3_parquet(
+            bucket_name=self.bucket,
+            file_key=self.consumption_data_filepath,
+            df=df
+        )
+
+        # We also estimate the energy consumption reduction from this data, by band
+        df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
+        consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
+        df = df.drop(columns=["total_consumption"])
+
+        self.consumption_averages_filepath = f"energy_consumption/{self.run_date}/consumption_averages.parquet"
+        logger.info(f"Storing consumption averages in s3 at {self.consumption_averages_filepath}")
+        # Save the consumption averages back to s3
+        save_dataframe_to_s3_parquet(
+            bucket_name="retrofit-data-dev",
+            file_key=self.consumption_averages_filepath,
+            df=consumption_averages
+        )
+
+        self.data = df
+
+    def transform(
+        self, data: pd.DataFrame, cleaned, new=False, save=False
+    ):
+        """
+        Given the input EPCs, this method will transform the data into a format that can be used by the model
+        This method can be used to transform the training data, or new epcs within the backend engine
+        :return:
+        """
+        if save and self.bucket is None:
+            raise Exception("bucket not set, cannot save data")
+
+        # TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
+        #       in anticipation of the new model
+
+        data["lodgement-date"] = pd.to_datetime(data["lodgement-date"])
+        data["lodgement-year"] = data["lodgement-date"].dt.year
+        data["lodgement-month"] = data["lodgement-date"].dt.month
+
+        # For walls, roof, floor description where we have average thermal transmittance, to avoid too many
+        # categories
+        # we group them
+        ranges = {
+            "lessthan 0.1": (0, 0.1),
+            "0.1 - 0.3": (0.1, 0.3),
+            "0.3 - 0.5": (0.3, 0.5),
+            "morethan 0.5": (0.5, 2.5),
+        }
+
+        # Generate the lookup table
+        thermal_transmittance_lookup_table = []
+        for i in range(1, 251):
+            value = i / 100
+            for label, (low, high) in ranges.items():
+                if low < value <= high:
+                    thermal_transmittance_lookup_table.append({"from": value, "to": label})
+                    break
+
+        # Convert to DataFrame for display
+        thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
+        thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
+
+        # Apply the lookup table to the data
+        for feature in ["walls-description", "roof-description", "floor-description"]:
+            cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
+            # Round to 2 decimal places and convert to string
+            cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
+
+            data = data.merge(
+                cleaned_df,
+                how="left",
+                left_on=feature,
+                right_on="original_description",
+            )
+            # We now have the thermal transmittance in the data, which we can use to group with the lookup table
+            data = data.merge(
+                thermal_transmittance_lookup_table,
+                how="left",
+                left_on="thermal_transmittance",
+                right_on="from",
+            )
+            # Where "to" is populated, replace feature with to
+            data[feature] = np.where(
+                ~pd.isnull(data["to"]),
+                data["to"],
+                data[feature]
+            )
+            data = data.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
+
+        data[self.NUMERICAL_COLUMNS] = data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric)
+        data[self.CATEGORICAL_COLUMNS] = data[self.CATEGORICAL_COLUMNS].astype(str)
+
+        # Create new features:
+        data['estimate_annual_kwh'] = data['energy-consumption-current'] * data['total-floor-area']
+
+        if save:
+            self.model_training_data_filepath = f"energy_consumption/{self.run_date}/training_data.parquet"
+            logger.info(f"Storing energy consumption dataset in s3 at {self.consumption_data_filepath}")
+            save_dataframe_to_s3_parquet(
+                bucket_name=self.bucket,
+                file_key=self.model_training_data_filepath,
+                df=data
+            )
+            return
+
+        return data
+
+    @staticmethod
+    def _prepare_epc(p: Property):
+        """
+        Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
+        kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
+        data is in the format required by the model
+        :return:
+        """
+
+        epc = p.data.copy()
+        numeric_cols = [
+            'current-energy-efficiency',
+            'potential-energy-efficiency', 'environment-impact-current',
+            'environment-impact-potential', 'energy-consumption-current',
+            'energy-consumption-potential', 'co2-emissions-current',
+            'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
+            'lighting-cost-current', 'lighting-cost-potential',
+            'heating-cost-current', 'heating-cost-potential',
+            'hot-water-cost-current', 'hot-water-cost-potential',
+            'total-floor-area', 'multi-glaze-proportion',
+            'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
+            'low-energy-lighting', 'number-open-fireplaces',
+            'wind-turbine-count', 'unheated-corridor-length',
+            'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
+            'low-energy-fixed-light-count',
+        ]
+        for v in numeric_cols:
+            if epc[v] is not None:
+                epc[v] = float(epc[v])
+
+        bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
+        bool_map = {
+            True: "Y",
+            False: "N",
+            None: "N",
+            "Y": "Y",
+            "N": "N"
+        }
+        for v in bools_to_remap:
+            epc[v] = bool_map[epc[v]]
+
+        no_data = {
+            "floor-level": "NODATA!",
+            "floor-energy-eff": "NO DATA!"
+        }
+        for v, fill_val in no_data.items():
+            if pd.isnull(epc[v]):
+                epc[v] = fill_val
+
+        return epc
+
+    def prepare_epc(self, input_properties: list[Property]):
+        scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
+        scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
+        scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
+
+        scoring_data["id"] = scoring_data["uprn"].copy()
+
+        return scoring_data
+
+    def convert_cost_to_today(self, original_cost, lodgement_date):
+        """
+        Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
+        (or as close to today as possible)
+        :param original_cost: The original energy cost
+        :param lodgement_date: The date the EPC was lodged
+        :return:
+        """
+        closest_date = self.retail_price_comparison.iloc[
+            (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
+        ]['Date'].values[0]
+        closest_date = pd.Timestamp(closest_date)
+
+        # Extract the tariff price on the closest date
+        tariff_2024 = self.retail_price_comparison[
+            self.retail_price_comparison['Date'] == closest_date
+            ]['Average standard variable tariff (Large legacy suppliers)'].values[0]
+
+        # Extract the latest available tariff price
+        latest_tariff = self.retail_price_comparison[
+            'Average standard variable tariff (Large legacy suppliers)'
+        ].iloc[-1]
+
+        # Calculate the ratio
+        ratio = float(latest_tariff) / float(tariff_2024)
+
+        # Calculate the updated heating cost
+        updated_cost = original_cost * ratio
+
+        return updated_cost
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@ -132,49 +132,56 @@ def app():

    energy_consumption_data = []
    for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
-        # Skip the first 50
-        if i < 250:
-            continue
-
-        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
-        # Rename the columns to the same format as the api returns
-        data.columns = [c.replace("_", "-").lower() for c in data.columns]
-        # Take just date before the date threshold
-        data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
-
-        data = data[~pd.isnull(data["uprn"])]
-        # Take just the newest EPC per uprn, based on lodgement-date
-        data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
-
-        data = data.sample(sample_size)
-        # We use the addreess data to find the related information
-
-        collected_data = []
-        for _, property_data in data.iterrows():
-            time.sleep(np.random.uniform(0.3, 2))
-
-            uprn = int(property_data["uprn"])
-            address = property_data["address1"]
-            postcode = property_data["postcode"]
-            expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
-
-            response = retrieve_find_my_epc_data(
-                uprn=uprn,
-                postcode=postcode,
-                address=address,
-                expected_expiry_date=expected_expiry_date
-            )
-            if response is None:
+        try:
+            # Skip the first 50
+            if i < 256:
                continue
-            collected_data.append(
-                {
-                    **response,
-                    "epc": property_data.to_dict(),
-                    "epc_directory": str(directory)
-                }
-            )

-        energy_consumption_data.extend(collected_data)
+            data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+            # Rename the columns to the same format as the api returns
+            data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
+            # Take just date before the date threshold
+            data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+            data = data[~pd.isnull(data["uprn"])]
+            # Take just the newest EPC per uprn, based on lodgement-date
+            data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
+
+            data = data.sample(sample_size, replace=False)
+            # We use the addreess data to find the related information
+
+            collected_data = []
+            for _, property_data in data.iterrows():
+                time.sleep(np.random.uniform(0.2, 1.5))
+
+                uprn = int(property_data["uprn"])
+                address = property_data["address1"]
+                postcode = property_data["postcode"]
+                expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
+
+                response = retrieve_find_my_epc_data(
+                    uprn=uprn,
+                    postcode=postcode,
+                    address=address,
+                    expected_expiry_date=expected_expiry_date
+                )
+                if response is None:
+                    continue
+                collected_data.append(
+                    {
+                        **response,
+                        "epc": property_data.to_dict(),
+                        "epc_directory": str(directory)
+                    }
+                )
+
+            energy_consumption_data.extend(collected_data)
+        except Exception as e:
+            print(f"Error for directory {directory}: {e}")
+            # If we have an error, then we wait for a bit since it's likely due to timeout
+            time.sleep(300)
+            continue

    # Store the pickle in s3
    save_time = datetime.now()
--- a/etl/bill_savings/data_combining.py
+++ b/etl/bill_savings/data_combining.py
@ -1,104 +0,0 @@
-import re
-from datetime import datetime
-from tqdm import tqdm
-
-import pandas as pd
-
-from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
-
-# These columns we co-erce to strings before saving
-PROBLEMATIC_COLUMNS = ["main-heating-controls", "floor-level"]
-
-
-def extract_kwh_value(text):
-    """
-    Extract the numerical kWh value from a given string.
-
-    :param text: The input string containing the kWh value.
-    :return: The extracted numerical kWh value as an integer.
-    """
-    # Use regular expression to find the numerical value followed by "kWh per year"
-    match = re.search(r'([\d,]+) kWh per year', text)
-
-    if match:
-        # Remove commas from the extracted value and convert to integer
-        kwh_value = int(match.group(1).replace(',', ''))
-        return kwh_value
-    else:
-        # If no match is found, return None or raise an exception
-        return None
-
-
-def app():
-    """
-    Given the files written in our datalake in s3, this application will collate the data into a single file
-    and store it back in s3 for analysis
-    :return:
-    """
-
-    # Firstly, list all of the saved files in s3
-    data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
-
-    run_date = datetime.now().strftime("%Y-%m-%d")
-
-    complete_data = []
-    for files in tqdm(data_files):
-        dataset_run_date = files.split("/")[-1].split(".")[0]
-        # Extract the date from the file name
-        dataset_run_date = pd.Timestamp(dataset_run_date)
-
-        # Load the data from the file
-        data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
-
-        # We check that the retrieved energy consumption sufficiently matches the EPC data
-        internal_dataset = []
-        for x in data:
-            epc_data = x["epc"]
-            epc_sap = epc_data["current-energy-efficiency"]
-            epc_potential_sap = epc_data["potential-energy-efficiency"]
-            # Make sure this matches the extracted sap
-            if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
-                x["potential_epc_efficiency"]
-            ):
-                continue
-
-            heating_kwh = extract_kwh_value(x["heating_text"])
-            hot_water_kwh = extract_kwh_value(x["hot_water_text"])
-            internal_dataset.append(
-                {
-                    **epc_data,
-                    "heating_kwh": heating_kwh,
-                    "hot_water_kwh": hot_water_kwh,
-                    "dataset_run_date": dataset_run_date
-                }
-            )
-
-        complete_data.extend(internal_dataset)
-
-    df = pd.DataFrame(complete_data)
-    # Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
-    # the uprn level, so we dedupe based on the newest dataset_run_date
-
-    df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
-    df = df.drop(columns=["dataset_run_date"])
-
-    for col in PROBLEMATIC_COLUMNS:
-        df[col] = df[col].astype(str)
-
-    # Save the data back to s3, but this time as a parquet file
-    save_dataframe_to_s3_parquet(
-        bucket_name="retrofit-data-dev",
-        file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet",
-        df=df
-    )
-
-    # We also estimate the energy consumption reduction from this data, by band
-    df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
-    consumption_averages = df.groupby("current-energy-rating")["total_consumption"].meam().reset_index()
-
-    # Save the consumption averages back to s3
-    save_dataframe_to_s3_parquet(
-        bucket_name="retrofit-data-dev",
-        file_key=f"energy_consumption/{run_date}/consumption_averages.parquet",
-        df=consumption_averages
-    )
--- a/etl/bill_savings/training.py
+++ b/etl/bill_savings/training.py
@ -1,57 +0,0 @@
-from pprint import pprint
-import msgpack
-from utils.s3 import read_from_s3
-from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
-
-
-def handler():
-    """
-    This function is used to train the model and store the final models in s3 as pickles
-    :return:
-    """
-
-    dataset_version = "2024-07-08"
-
-    # Usage:
-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
-    )
-
-    cleaned = msgpack.unpackb(cleaned, raw=False)
-
-    model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
-    model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet')
-    model.feature_engineering()
-    model.save_dummy_schema(dataset_version=dataset_version)
-
-    # For heating_kwh
-    model.split_dataset(target='heating_kwh')
-    model.fit_model(target='heating_kwh')
-    model.re_train_final_model(target='heating_kwh')
-    evaluation_results = model.evaluate_model(target='heating_kwh')
-
-    pprint(evaluation_results["train"])
-    pprint(evaluation_results["test"])
-
-    model.save_model(target='heating_kwh', dataset_version=dataset_version)
-
-    # importance_df = evaluation_results["train"]["Feature Importance"]
-    # testing_predictions = model.testing_predictions["heating_kwh"]
-    # testing_predictions = testing_predictions.sort_values("residual", ascending=False)
-    # training_predictions = model.training_predictions["heating_kwh"]
-    # training_predictions = training_predictions.sort_values("residual", ascending=False)
-    # # Merge on model.input_data, by the index
-    # merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
-    # merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
-
-    # For hot_water_kwh
-    model.split_dataset(target='hot_water_kwh')
-    model.fit_model(target='hot_water_kwh')
-    model.re_train_final_model(target='hot_water_kwh')
-    evaluation_results = model.evaluate_model(target='hot_water_kwh')
-
-    pprint(evaluation_results["train"])
-    pprint(evaluation_results["test"])
-
-    model.save_model(target='hot_water_kwh', dataset_version=dataset_version)
--- a/etl/bill_savings/training_data.py
+++ b/etl/bill_savings/training_data.py
@ -0,0 +1,24 @@
+import msgpack
+from etl.bill_savings.KwhData import KwhData
+from utils.s3 import read_from_s3
+
+
+def app():
+    """
+    Given the files written in our datalake in s3, this application will collate the data into a single file
+    and store it back in s3 for analysis
+    :return:
+    """
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    # If there is any problematic data, it could be:
+    # s3://retrofit-datalake-dev/energy_consumption_data/2024-08-10 18:48:06.866647.pkl
+    kwh_data_client = KwhData(bucket="retrofit-datalake-dev")
+    kwh_data_client.combine()
+    kwh_data_client.transform(data=kwh_data_client.data, cleaned=cleaned, save=True)
--- a/etl/customers/bcc_tender/app.py
+++ b/etl/customers/bcc_tender/app.py
@ -0,0 +1,211 @@
+"""
+This script prepares some data for the Birmingham City Council tender
+"""
+import pandas as pd
+import numpy as np
+
+epc_data = pd.read_csv("local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv")
+
+# Broad assumptions
+# Around 67% of homes in the Uk have an EPC, to be conservative with our estimates, we round up to 70%:
+# https://www.ons.gov.uk/peoplepopulationandcommunity/housing/articles/energyefficiencyofhousinginenglandandwales/2023
+# However, we have 322128 homes in Birmingham with an EPC, which is 76% of the total number of homes in Birmingham
+# based on the 2021 census, which put this figure at 423,500 homes
+PROPORTION_OF_HOMES_WITH_AN_EPC = 0.761
+N_HOUSEHOLDS_IN_BIRMINGHAM = 423_500
+N_HOMES_WITHOUT_AN_EPC = 423_500 - 322128
+
+# 55% of households are recipients of benefits in the West Midlands
+# (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
+PROPORTION_OF_HOMES_ON_BENEFITS = 0.55
+
+# https://www.justgroupplc.co.uk/~/media/Files/J/Just-Retirement-Corp/news-doc/2023/six-in-10-homeowners-eligible-for
+# -benefits-failing-to-claim-just-group-annual-insight-report.pdf
+PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS = 0.106
+
+# Breakdown of properties in council tax bands in the UK, to give us an estimate of the number of properties in A-D
+band_a_proportion = 0.239
+band_b_proportion = 0.195
+band_c_proportion = 0.219
+band_d_proportion = 0.156
+COUNCIL_TAX_BAND_A_TO_D_PROPORTION = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
+
+# Get the newest record, based on lodgment datetime, by uprn
+epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
+epc_data = epc_data.sort_values(["LODGEMENT_DATETIME"], ascending=False).drop_duplicates("UPRN")
+
+# We want to figure out the number of properties that are eligible for ECO/GBIS funding
+
+social_tenures = ["Rented (social)", "rental (social)"]
+owner_occupied_tenures = ["Owner-occupied", "owner-occupied"]
+prs_tenures = ["Rented (private)", "rental (private)"]
+
+# If social tenure, then as long as the property is EPC D-G, it's eligible
+epc_data["eligibility_type"] = None
+
+# Eligibiltiy 1: ECO4 help to heat group OO - tenure is owner occupied and EPC rating D-G
+epc_data["eligibility_type"] = np.where(
+    (
+        epc_data["TENURE"].isin(owner_occupied_tenures) &
+        epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
+        pd.isnull(epc_data["eligibility_type"])
+    ),
+    "eco4_oo_hthg_needs_scaling_on_benefits",
+    epc_data["eligibility_type"]
+)
+
+# Eligibility 2: ECO4 help to heat group PRS - tenure is private rental and EPC rating E-G
+epc_data["eligibility_type"] = np.where(
+    (
+        epc_data["TENURE"].isin(prs_tenures) &
+        epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"]) &
+        pd.isnull(epc_data["eligibility_type"])
+    ),
+    "eco4_prs_hthg_needs_scaling_on_benefits",
+    epc_data["eligibility_type"]
+)
+
+# Eligibiltiy 3: ECO4 Social housing - tenure is social rented and EPC rating D-G
+epc_data["eligibility_type"] = np.where(
+    (
+        epc_data["TENURE"].isin(social_tenures) &
+        epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
+        pd.isnull(epc_data["eligibility_type"])
+    ),
+    "eco4_social_housing",
+    epc_data["eligibility_type"]
+)
+
+# Eligibility 4: GBIS General Eligibility, OO - tenure is owner occupied and EPC rating D-G
+# This is a subset of Eligiblity 1. We scale eco4_oo_hthg_needs_scaling based on thhe % of properties on benefits
+# For any properties left over that are deemed as not eligibile, a % of these will be eligible for GBIS via Eligibility
+# 4, and therefore any properties that fall out of Eligibility 1, a % will fall into eligibility 4 based a % of units
+# being in council tax bands A-D
+
+# Eligibility 5: GBIS General Eligibility, PRS - tenure is private rental and EPC rating D-G
+# Additionally, some units that fall our of Eligibility 2 will be eligible for GBIS via Eligibility 5, via the same
+# mechanism as Eligibility 4. We handle this later
+epc_data["eligibility_type"] = np.where(
+    (
+        epc_data["TENURE"].isin(prs_tenures) &
+        epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
+        pd.isnull(epc_data["eligibility_type"])
+    ),
+    "gbis_prs_ge_needs_scaling_on_council_tax_band",
+    epc_data["eligibility_type"]
+)
+
+# Example EPCS to analysis
+analysis_epcs = epc_data[~pd.isnull(epc_data["eligibility_type"])].copy()
+# Keep just columns we need
+analysis_epcs = analysis_epcs[
+    [
+        "UPRN", "TENURE", "CURRENT_ENERGY_RATING", "WALLS_DESCRIPTION", "ROOF_DESCRIPTION",
+        "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA", "PROPERTY_TYPE", "BUILT_FORM", "MAINHEAT_DESCRIPTION",
+        "eligibility_type",
+    ]
+]
+analysis_epcs["grouped_epc_band"] = np.where(
+    analysis_epcs["CURRENT_ENERGY_RATING"].isin(["D"]),
+    "EPC D",
+    "EPC E-G"
+)
+analysis_epcs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/bcc tender/analysis_epcs.csv", index=False)
+
+# Create aggregations and we store this information
+agg_cols = ["CURRENT_ENERGY_RATING", "CONSTRUCTION_AGE_BAND", "PROPERTY_TYPE", "BUILT_FORM", "grouped_epc_band"]
+agg_cols = ["WALLS_DESCRIPTION", "ROOF_DESCRIPTION", "MAINHEAT_DESCRIPTION"]
+for col in agg_cols:
+    agg_df = analysis_epcs.groupby([col]).size().reset_index(name="Number of Properties")
+    agg_df["Percentage of Properties"] = 100 * agg_df["Number of Properties"] / agg_df["Number of Properties"].sum()
+    agg_df.to_csv(f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/bcc tender/{col}.csv", index=False)
+
+# Eligibiilty 6: GBIS General Eligibility, Social - tenure is social rented and EPC rating D-G, but also the property
+# should be rented out below market rate
+# This is a subset of Eligibility 3 - we likely don't need to do any scaling
+
+n_eco4_oo_hthg_needs_scaling_on_benefits = epc_data[
+    epc_data["eligibility_type"] == "eco4_oo_hthg_needs_scaling_on_benefits"
+    ].shape[0]
+
+n_eco4_prs_hthg_needs_scaling_on_benefits = epc_data[
+    epc_data["eligibility_type"] == "eco4_prs_hthg_needs_scaling_on_benefits"
+    ].shape[0]
+
+n_eco4_social = epc_data[
+    epc_data["eligibility_type"] == "eco4_social_housing"
+    ].shape[0]
+
+n_gbis_prs_ge_needs_scaling_on_council_tax_band = epc_data[
+    epc_data["eligibility_type"] == "gbis_prs_ge_needs_scaling_on_council_tax_band"
+    ].shape[0]
+
+# We're going to make the broad assumption that all homeowners claiming for benefits, live in homes in council tax
+# bands A-D. There there are no additionals in eligibility 4 and 5
+
+# n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS)
+n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
+
+# n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMES_ON_BENEFITS)
+n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
+
+n_eligiblity_3 = n_eco4_social
+
+# We subtract the number of homes in eligiblity 1, from the number of homes under ECO4 OO, HTHG, before scaling on
+# benefits. This gives us the number of homes that were not on benefits. We then scale this number based on the % of
+# homes in council tax bands A-D
+# n_eligiblity_4 = np.floor(
+#     (n_eco4_oo_hthg_needs_scaling_on_benefits - n_eligibility_1) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION
+# )
+
+# We also need to add on homes that fall out of eligibility 2
+n_eligibiltiy_5 = np.floor(
+    np.floor(n_gbis_prs_ge_needs_scaling_on_council_tax_band * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
+    # np.floor((n_eco4_prs_hthg_needs_scaling_on_benefits - n_eligibility_2) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
+)
+
+# We don't scale up the # of homes based on % of homes with an EPC, because
+n_owner_occupied = epc_data[epc_data["TENURE"].isin(owner_occupied_tenures)].shape[0]
+oo_eligibility = n_eligibility_1
+
+# 68% of owner occupied are eligibiltiy
+proportion_of_oo_eligible = oo_eligibility / n_owner_occupied
+# We then use this % on the rest of the homes in Birmingham that do not have an EPC
+oo_eligible_without_an_epc = np.floor(N_HOMES_WITHOUT_AN_EPC * proportion_of_oo_eligible)
+oo_eligibility = oo_eligibility + oo_eligible_without_an_epc
+
+# All private rentals require an EPC
+prs_eligibility = (n_eligibility_2 + n_eligibiltiy_5)
+# Most social housing properties will have an EPC so we don't scale this up
+social_eligibility = n_eligiblity_3
+
+# We scale this up since this number is based on the number of homes in Birmingham with an EPC, and we want to
+# estimate the total number of homes in Birmingham
+total_eligible = oo_eligibility + prs_eligibility + social_eligibility
+
+proportion_of_homes_eligibile = total_eligible / N_HOUSEHOLDS_IN_BIRMINGHAM
+# Approx 53% of homes in Birmingham are eligible for ECO/GBIS funding
+
+# Approximately 53% of Homes are eligible for some form of ECO4 or GBIS funding, 227k homes
+# This is broken down as follows:
+# - 155k owner occupiers
+# - 33k private rentals
+# - 39k social housing
+
+# We can't seem to identify the properties owned by the council in the company ownership data, because what is the
+# entity that owns the property? Is it the council, or is it a company that is owned by the council? We can't be sure
+# and so since BCC owns 54,000 social housing properties (5k) supported housing
+# [https://www.birmingham.gov.uk/info/50094/housing_options/2686/apply_for_social_housing#:~:text=We%20manage
+# %20around%2054%2C000%20social,a%20member%20of%20your%20household.]
+# and there are 78,410 social housing properties in Birmingham, we can assume that the council owns 54,000 of these
+# and so 69% of the social housing is owned by the Council
+
+# Since we saw that 38,779 of 78,410 social housing looked to be able to benefit from ECO/GBIS funding, we can assume
+# that 69% of these are owned by the council, which is 26,757 properties
+
+# So, with these assumptions in mind:
+# We can commit to [x] per annum based on your 54k council-owned, of which approximately 27k are likely to be eligible
+# for some form of ECO/GBIS funding. We will work directly with Housing associations to address the remaining 12k
+# social properties that may be eligible for funding through ECO/GBIS.
+# We will market directly to the 33k private rentals and 155k owner occupiers that are eligible for funding,
+# and assuming a 5% conversion, will aim to complete work on
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@ -11,7 +11,10 @@ from utils.s3 import read_dataframe_from_s3_parquet
 # The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
 # Therefore value_of_F * 1.15 = value_of_D * 1.03
 # Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
-PROPERTY_VALUE_ESTIMATE = 213_165
+PROPERTY_VALUE_ESTIMATE = 200_000
+
+# UPRNs of properties we need
+MANUAL_EXCLUSIONS = []


 def aggregate_matches(matching_lookup, company_ownership, properties):
@ -72,10 +75,15 @@ def find_f_g_properties(paths):
        epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
        epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)

-        # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
-        epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+        if pd.isnull(pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")).sum():
+            raise Exception("wtf")

-        epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+        # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+        epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
+
+        epc_data = epc_data.sort_values(
+            ["LODGEMENT_DATE", "LODGEMENT_DATETIME"], ascending=False
+        ).drop_duplicates("UPRN")

        # Get G & F properties
        epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
@ -84,7 +92,7 @@ def find_f_g_properties(paths):
    data = pd.concat(data)

    # Save as an excel
-    data.to_excel("EPC F & G Properties.xlsx", index=False)
+    data.to_excel("EPC F & G Properties - V2.xlsx", index=False)


 def remove_text_in_brackets(address: str) -> str:
@ -196,7 +204,7 @@ def remove_duplicate_matches(matching_lookup, properties, company_ownership):
            matches_to_drop[["UPRN", "Title Number"]].copy()
        )

-    to_drop = pd.concat(to_drop)
+    to_drop = pd.concat(to_drop) if to_drop else pd.DataFrame()

    if not to_drop.empty:
        merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
@ -245,6 +253,74 @@ def remove_duplicate_uprn_matches(matching_lookup, properties, company_ownership
    return matching_lookup


+def filter_land_registry(properties):
+    column_names = [
+        "transaction_id",
+        "price",
+        "date_of_transfer",
+        "postcode",
+        "property_type",
+        "old_new",
+        "duration",
+        "paon",
+        "saon",
+        "street",
+        "locality",
+        "town_city",
+        "district",
+        "county",
+        "ppd_category_type",
+        "record_status",
+    ]
+    land_registry = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/pp-complete.csv", header=None)
+    land_registry.columns = column_names
+    land_registry = land_registry[
+        land_registry["postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())
+    ]
+    land_registry["date_of_transfer"] = pd.to_datetime(
+        land_registry["date_of_transfer"], format="%Y-%m-%d", errors="coerce"
+    )
+    # Take data from the last 5 years
+    land_registry = land_registry[
+        (land_registry["date_of_transfer"] >= "2019-01-01")
+    ]
+
+    # Filter this
+    land_registry.to_csv(
+        "/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv", index=False
+    )
+
+
+def is_substring(x, match_string):
+    if pd.isnull(x):
+        return False
+    return x in match_string.lower()
+
+
+def house_number_match(paon, house_number):
+    # Firstly try and convert to numberic
+    try:
+        paon_numeric = int(paon)
+        house_number_numeric = int(house_number)
+        return paon_numeric == house_number_numeric
+    except Exception as e:  # noqa
+        # If we can't convert both to numeric, we do an equality
+
+        return paon == house_number
+
+
+def check_equalities(lr_filtered):
+    all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
+    if pd.isnull(lr_filtered["saon"].values[0]):
+        all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
+    else:
+        all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
+
+    all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
+
+    return all_paon_equal, all_saon_equal, all_street_equal
+
+
 def app():
    """
    This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
@ -254,8 +330,8 @@ def app():
    #       https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
    #       =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
    #       is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
-    #       it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
-    #       and performing a singular filter for most recent EPC by UPRN
+    #       it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating
+    #       together and performing a singular filter for most recent EPC by UPRN
    # paths = [
    #     "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
    #     "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
@ -293,17 +369,19 @@ def app():
    # paths = list(set(paths))
    # find_f_g_properties(paths)

-    properties = pd.read_excel("EPC F & G Properties.xlsx")
-    company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+    properties = pd.read_excel("EPC F & G Properties - V2.xlsx")
+    # filter_land_registry(properties)
+    company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv")
    company_ownership["is_overseas"] = False
-    overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv")
+    overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv")
    overseas_company_ownership["is_overseas"] = True

    company_ownership = pd.concat([company_ownership, overseas_company_ownership])

    # FIlter on relevant postcodes
    company_ownership = company_ownership[
-        company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
+        company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())
+    ]

    # Now we filter properties the other way around
    properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
@ -328,6 +406,8 @@ def app():
            ~company_ownership["Property Address"].str.lower().str.startswith(starting_term)
        ]

+    # address = properties[properties["UPRN"] == 100030253055].squeeze()
+
    freehold_matching_lookup = []  # 634
    leasehold_matching_lookup = []  # 86
    shared_leasehold_match = []
@ -414,13 +494,16 @@ def app():

    freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
    leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
-    shared_leasehold_match = pd.concat(shared_leasehold_match)
-    shared_freehold_match = pd.concat(shared_freehold_match)

-    # freehold_matching_lookup.to_excel("freehold_matching_lookup_new.xlsx")
-    # leasehold_matching_lookup.to_excel("leasehold_matching_lookup_new.xlsx")
-    # shared_leasehold_match.to_excel("shared_leasehold_match_new.xlsx")
-    # shared_freehold_match.to_excel("shared_freehold_match_new.xlsx")
+    # freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx")
+    # leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx")
+    # freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
+    # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")
+
+    # freehold_matching_lookup.shape
+    # (1537, 4)
+    # leasehold_matching_lookup.shape
+    # (390, 4)

    # The approximate matches aren't very good
    freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
@ -428,24 +511,313 @@ def app():

    # Combine
    combined_matching_lookup = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
+
    # Remove duplicates
-    combined_matching_lookup = remove_duplicate_matches(combined_matching_lookup, properties, company_ownership)
+    combined_matching_lookup = remove_duplicate_matches(
+        matching_lookup=combined_matching_lookup, properties=properties, company_ownership=company_ownership
+    )
    # We also have duplicates at a UPRN level
    combined_matching_lookup = remove_duplicate_uprn_matches(combined_matching_lookup, properties, company_ownership)

-    # There are some cases where we have duplicates
-    # freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
-    # leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
-
    matched_addresses = combined_matching_lookup.merge(
-        properties[["UPRN", "ADDRESS", "CURRENT_ENERGY_EFFICIENCY", "CURRENT_ENERGY_RATING"]].rename(
-            columns={"ADDRESS": "epc_address"}),
+        properties[
+            [
+                "UPRN",
+                "ADDRESS",
+                "ADDRESS1",
+                "CURRENT_ENERGY_EFFICIENCY",
+                "CURRENT_ENERGY_RATING",
+                "POSTCODE",
+                "LODGEMENT_DATE",
+                "TRANSACTION_TYPE"
+            ]
+        ].rename(
+            columns={
+                "ADDRESS": "epc_address",
+                "ADDRESS1": "epc_address1",
+                "POSTCODE": "epc_postcode"
+            }
+        ),
        how="left", on="UPRN"
    ).merge(
-        company_ownership[["Title Number", "Property Address", "Company Registration No. (1)", "Proprietor Name (1)"]],
+        company_ownership[
+            [
+                "Title Number",
+                "Property Address",
+                "Postcode",
+                "Company Registration No. (1)",
+                "Proprietor Name (1)",
+                "Date Proprietor Added",
+            ]
+        ],
        how="left", on="Title Number"
    )

+    # Let's try and get the house number
+    matched_addresses["house_number"] = (
+        matched_addresses["epc_address"]
+        .apply(remove_text_in_brackets)
+        .apply(SearchEpc.get_house_number)
+        .str.lower()
+        .str.replace(",", "")
+    )
+
+    # Read in land registry
+    land_registry = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv",
+    )
+
+    # We now perform a match between the land registry data and the matched address, in an attempt to find
+    # out when these properties last sold. The land registry data has been pre filtered on the postcodes in this
+    # data, and for sales within the last 5 years, to ensure the file isn't too large.
+
+    land_registry["postcode"] = land_registry["postcode"].str.lower().str.strip()
+    land_registry["street"] = land_registry["street"].str.lower().str.strip()
+    land_registry["paon"] = land_registry["paon"].str.lower().str.strip()
+    land_registry["saon"] = land_registry["saon"].str.lower().str.strip()
+    land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"])
+
+    land_registry_matches = []
+    for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)):
+        # Filter land registry on the postcode
+        lr_filtered = land_registry[
+            (land_registry["postcode"] == match["epc_postcode"].lower().strip())
+        ]
+
+        # Filter further, when the street is in in the address
+        # street should be contained in epc_address
+        lr_filtered = lr_filtered[
+            lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower())) |
+            lr_filtered["street"].apply(lambda x: is_substring(x, match["Property Address"].lower()))
+            ]
+
+        if lr_filtered.empty:
+            continue
+
+        # We now check if paon is in address 1
+        lr_filtered["paon_match"] = lr_filtered["paon"].apply(lambda x: house_number_match(x, match["house_number"]))
+        # We also try the secondary match
+        lr_filtered["saon_match"] = (
+            lr_filtered["saon"].apply(
+                lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"])
+            )
+        )
+        # We fileter where we have a primary or secondary match
+        lr_filtered = lr_filtered[
+            lr_filtered["paon_match"] | lr_filtered["saon_match"]
+            ]
+
+        if lr_filtered.empty:
+            continue
+        elif lr_filtered.shape[0] == 1:
+            land_registry_matches.append(
+                {
+                    "uprn": match["UPRN"],
+                    "transaction_id": lr_filtered['transaction_id'].values[0],
+                    "price": lr_filtered["price"].values[0],
+                    "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                }
+            )
+            continue
+        elif lr_filtered.shape[0] > 1:
+            # We make sure all records are the same and take the newest
+            all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
+            has_paon_match = any(lr_filtered["paon_match"])
+
+            if all_paon_equal and all_street_equal and all_saon_equal:
+                # Take the newest record, append and continue
+                lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                lr_filtered = lr_filtered.head(1)
+                land_registry_matches.append(
+                    {
+                        "uprn": match["UPRN"],
+                        "transaction_id": lr_filtered['transaction_id'].values[0],
+                        "price": lr_filtered["price"].values[0],
+                        "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                    }
+                )
+                continue
+            elif has_paon_match and all_street_equal:
+                # Peform filter on paon
+                lr_filtered = lr_filtered[lr_filtered["paon_match"]]
+                # Do an addtiioanl equality check
+                all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
+                if all_paon_equal and all_street_equal and all_saon_equal:
+                    lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                    lr_filtered = lr_filtered.head(1)
+                    land_registry_matches.append(
+                        {
+                            "uprn": match["UPRN"],
+                            "transaction_id": lr_filtered['transaction_id'].values[0],
+                            "price": lr_filtered["price"].values[0],
+                            "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                        }
+                    )
+                else:
+                    # We do a match on saon
+                    lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
+                        lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address"])
+                    )
+
+                    lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
+
+                    if lr_filtered.empty:
+                        continue
+                    elif lr_filtered.shape[0] == 1:
+                        land_registry_matches.append(
+                            {
+                                "uprn": match["UPRN"],
+                                "transaction_id": lr_filtered['transaction_id'].values[0],
+                                "price": lr_filtered["price"].values[0],
+                                "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                            }
+                        )
+                        continue
+                    else:
+                        raise NotImplementedError("wtf")
+            else:
+                # We have a final check, based on an observed case
+                lr_address_1 = " ".join([x.lower().strip() for x in match["Property Address"].split(",")[0:2]])
+
+                lr_filtered["paon_match2"] = lr_filtered["paon"].apply(
+                    lambda x: False if pd.isnull(x) else is_substring(x, lr_address_1)
+                )
+
+                lr_filtered = lr_filtered[lr_filtered["paon_match2"]]
+
+                if lr_filtered.empty:
+                    continue
+                elif lr_filtered.shape[0] == 1:
+                    land_registry_matches.append(
+                        {
+                            "uprn": match["UPRN"],
+                            "transaction_id": lr_filtered['transaction_id'].values[0],
+                            "price": lr_filtered["price"].values[0],
+                            "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                        }
+                    )
+                    continue
+                else:
+                    # Check all the same
+                    all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
+
+                    # Check saon is house number with exact match
+                    lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
+                        lambda x: False if pd.isnull(x) else house_number_match(x, match["house_number"])
+                    )
+                    # We check if we have a flat
+                    match_flat_number = re.match("flat (\d+)", match["epc_address1"].lower())
+                    match_apartment_number = re.match("apartment (\d+)", match["epc_address1"].lower())
+                    lr_filtered["saon_match3"] = False
+                    if match_flat_number is not None:
+                        # Get out the match
+                        match_flat_number = "flat " + match_flat_number.group(1)
+                        lr_filtered["saon_match3"] = lr_filtered["saon"].apply(
+                            lambda x: False if pd.isnull(x) else x == match_flat_number
+                        )
+
+                    if match_apartment_number is not None:
+                        # Get out the match
+                        match_apartment_number = "apartment " + match_apartment_number.group(1)
+                        lr_filtered["saon_match3"] = lr_filtered["saon"].apply(
+                            lambda x: False if pd.isnull(x) else x == match_apartment_number
+                        )
+
+                    if all_paon_equal and all_saon_equal and all_street_equal:
+                        # Take the newest record
+                        lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                        lr_filtered = lr_filtered.head(1)
+                        land_registry_matches.append(
+                            {
+                                "uprn": match["UPRN"],
+                                "transaction_id": lr_filtered['transaction_id'].values[0],
+                                "price": lr_filtered["price"].values[0],
+                                "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                            }
+                        )
+                        continue
+                    elif any(lr_filtered["saon_match2"]):
+                        lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
+                        all_saon_equal, all_paon_equal, all_street_equal = check_equalities(lr_filtered)
+                        if all_paon_equal and all_saon_equal and all_street_equal:
+                            # Filter on the newest record
+                            lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                            lr_filtered = lr_filtered.head(1)
+                        if lr_filtered.shape[0] == 1:
+                            land_registry_matches.append(
+                                {
+                                    "uprn": match["UPRN"],
+                                    "transaction_id": lr_filtered['transaction_id'].values[0],
+                                    "price": lr_filtered["price"].values[0],
+                                    "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                                }
+                            )
+                            continue
+                    elif any(lr_filtered["saon_match3"]):
+                        lr_filtered = lr_filtered[lr_filtered["saon_match3"]]
+                        if lr_filtered.shape[0] == 1:
+                            land_registry_matches.append(
+                                {
+                                    "uprn": match["UPRN"],
+                                    "transaction_id": lr_filtered['transaction_id'].values[0],
+                                    "price": lr_filtered["price"].values[0],
+                                    "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                                }
+                            )
+                            continue
+
+                    raise NotImplementedError("wtf")
+        else:
+            raise NotImplementedError("What happened here?")
+
+    land_registry_matches = pd.DataFrame(land_registry_matches)
+    # land_registry_matches.to_excel("land_registry_matches.xlsx")
+
+    # Check the matches against the addresses
+    # lr_to_addresses = matched_addresses[
+    #     ["UPRN", "epc_address", "epc_postcode", "Property Address", "Postcode"]
+    # ].merge(
+    #     land_registry_matches,
+    #     how="inner",
+    #     left_on="UPRN",
+    #     right_on="uprn"
+    # ).drop(columns=["uprn"]).merge(
+    #     land_registry[["transaction_id", "paon", "saon", "street", "postcode"]],
+    #     how="left", on="transaction_id"
+    # )
+
+    # Merge onto matched addresses
+    matched_addresses = matched_addresses.merge(
+        land_registry_matches,
+        how="left",
+        left_on="UPRN",
+        right_on="uprn"
+    ).drop(columns=["uprn"])
+
+    # Flag anything that sold in the last year
+    matched_addresses["sold_recently"] = (
+        matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1)
+    )
+
+    matched_addresses["sale_lodged_recently"] = (
+        (pd.to_datetime(matched_addresses["LODGEMENT_DATE"]) >= pd.Timestamp.now() - pd.DateOffset(months=12)) &
+        (matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"]))
+    )
+
+    # Save this
+    # matched_addresses.to_excel("combined_aggregate - pre filter 28th July.xlsx", index=False)
+
+    # Drop rows on the booleans
+    matched_addresses = matched_addresses[
+        ~matched_addresses["sold_recently"] &
+        ~matched_addresses["sale_lodged_recently"]
+        ]
+
+    # Filter combined_matching_lookup accordingly
+    combined_matching_lookup = combined_matching_lookup[
+        combined_matching_lookup["UPRN"].isin(matched_addresses["UPRN"])
+    ]
+
    # shared_freehold_match = pd.DataFrame(shared_freehold_match)
    # Strore these files
    # freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
@ -457,33 +829,28 @@ def app():
    # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup.xlsx")
    # shared_leasehold_match = pd.read_excel("shared_leasehold_match.xlsx")

-    freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
-    leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
+    # freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
+    # leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)

    combined_aggregate = aggregate_matches(
-        combined_matching_lookup, company_ownership, properties
+        matching_lookup=combined_matching_lookup,
+        company_ownership=company_ownership,
+        properties=properties
    )

-    investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
    investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]

-    investment_20m_properties = matched_addresses[
-        matched_addresses["Company Registration No. (1)"].isin(investment_20m["Company Registration No. (1)"])
-    ]
-
    investment_50m_properties = matched_addresses[
        matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"])
    ]

    portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])]
-    portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])]

-    investment_20m_properties.to_excel("investment_20m_properties 28th May.xlsx", index=False)
-    investment_50m_properties.to_excel("investment_50m_properties 28th May.xlsx", index=False)
+    # Storing data
+    # investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False)

    # Store the EPC data
-    portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
-    portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
+    # portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th July.xlsx", index=False)

    # We check if any of these properties are in a conservation area
    valuations = pd.read_excel("property value.xlsx")
@ -529,6 +896,48 @@ def company_aggregation():
    aggregation.to_excel("Company ownership aggregation.xlsx")


+def extract_price_info(text):
+    # Use regex to find the relevant price information
+    match = re.search(r'Estimated price\n\nLow£([\d,]+)k\n\n£([\d,]+)k\n\nHigh£([\d,]+)k', text)
+    if match:
+        low_price = int(match.group(1).replace(',', '')) * 1000
+        est_price = int(match.group(2).replace(',', '')) * 1000
+        high_price = int(match.group(3).replace(',', '')) * 1000
+
+        price_info = {
+            'Zoopla Valuation': est_price,
+            'Zoopla Lower Bound': low_price,
+            'Zoopla Upper Bound': high_price
+        }
+
+        return price_info
+
+    return None
+
+
+def get_valuations(portfolio_epc_data_50m):
+    # This gets blocked pretty quickly by Zoopla
+    import requests
+    import time
+    from tqdm import tqdm
+    valuation_data = []
+    for _, property_data in tqdm(portfolio_epc_data_50m.iterrows(), total=len(portfolio_epc_data_50m)):
+        uprn = property_data["UPRN"]
+        response = requests.get(
+            f"https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/{uprn}/"
+        )
+
+        pricing = extract_price_info(response.text)
+        valuation_data.append(
+            {
+                "UPRN": uprn,
+                **pricing
+            }
+        )
+
+        time.sleep(2)
+
+
 def prepare_anonymised_data():
    investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0)
    investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0)
@ -603,3 +1012,230 @@ def prepare_anonymised_data():
    )

    df.to_excel("Property List - 50% redacted.xlsx", index=False)
+
+
+def adhoc_change_of_portfolio_analysis_july_2024():
+    """
+    This is just some adhoc analysis, which answers some questions which arose upon refreshing the SFR portfolio
+    in late July 2024
+    :return:
+    """
+
+    # Question 1: Which properties in the previous portfolio were in conservation areas or had listed/heritage status?
+    def answer_q1():
+        # Data was just stored here:
+        geospatial_data = pd.read_excel("geospatial_data.xlsx")
+
+        special_buildings = geospatial_data[
+            (geospatial_data["conservation_status"] == 1) |
+            geospatial_data["is_listed_building"] |
+            geospatial_data["is_heritage_building"]
+            ]
+
+        print(
+            f"There were {special_buildings.shape[0]} properties in the previous portfolio which were in conservation "
+            f"areas or had listed/heritage status"
+        )
+        print(f"{(special_buildings['conservation_status'] == 1).sum()} were in a conservation area")
+        print(f"{special_buildings['is_listed_building'].sum()} were listed buildings")
+        print(f"{special_buildings['is_heritage_building'].sum()} were heritage buildings")
+
+    answer_q1()
+
+    # Question 2: For each property in the old portfolio, why was it lost?
+    def answer_q2():
+        # We read in the previous 50m portfolio
+        previous_portfolio = pd.read_excel("investment_50m_properties 28th May.xlsx")  # 39 owners
+
+        new_matched_addresses = pd.read_excel("combined_aggregate - pre filter 28th July.xlsx")
+        new_portfolio = pd.read_excel("investment_50m_properties 28th July.xlsx")  # 69 owners
+
+        # dropped units
+        dropped_units = previous_portfolio[
+            ~previous_portfolio["UPRN"].isin(new_portfolio["UPRN"].values)
+        ]
+        # Lots of properties are missed out - why
+        # 1) What was dropped, but was in the matched addresses and therefore was maybe filtered out
+        dropped_units_matched = dropped_units[
+            dropped_units["UPRN"].isin(new_matched_addresses["UPRN"])
+        ].copy()
+
+        dropped_units_matched = dropped_units_matched.merge(
+            new_matched_addresses[
+                ["UPRN", 'transaction_id', 'price', 'date_of_transfer', 'sold_recently', 'sale_lodged_recently']
+            ],
+            how="left", on="UPRN"
+        )
+
+        # 97 units here - how mant were sold
+        of_which_sold = dropped_units_matched[
+            dropped_units_matched["sold_recently"]
+        ]
+        n_sold = of_which_sold.shape[0]
+        print(f"{n_sold} sold recently ({n_sold / previous_portfolio.shape[0] * 100})%")
+
+        of_which_have_sale_epc_but_not_sold = dropped_units_matched[
+            ~dropped_units_matched["sold_recently"] & dropped_units_matched["sale_lodged_recently"]
+            ]
+        n_with_sale_epc_but_not_yet_sold = of_which_have_sale_epc_but_not_sold.shape[0]
+        print(
+            f"{n_with_sale_epc_but_not_yet_sold} have a sale EPC but have not sold yet ("
+            f"{n_with_sale_epc_but_not_yet_sold / previous_portfolio.shape[0] * 100})%"
+        )
+
+        # What about things that haven't sold or don't look likely to sell
+        not_sold = dropped_units_matched[
+            ~dropped_units_matched["sold_recently"] & ~dropped_units_matched["sale_lodged_recently"]
+            ]
+
+        new_owner_sizes = new_portfolio.groupby(
+            ["Company Registration No. (1)"]
+        ).size().reset_index().rename(columns={0: "Number of Properties"})
+        new_owner_sizes = new_owner_sizes.sort_values("Number of Properties", ascending=False)
+
+        previous_owner_sizes = previous_portfolio.groupby(
+            ["Company Registration No. (1)"]
+        ).size().reset_index().rename(columns={0: "Number of Properties"})
+        previous_owner_sizes = previous_owner_sizes.sort_values("Number of Properties", ascending=False)
+
+        # Let's just confirm that we took in a bigger owner, as we see this unit was still matched
+        owner_too_small = []
+        owner_big_enough = []
+        for _, property in not_sold.iterrows():
+            owner_reg_id = property["Company Registration No. (1)"]
+            old_portfolio_owner_size = previous_owner_sizes[
+                previous_owner_sizes["Company Registration No. (1)"] == owner_reg_id
+                ]
+            # We make sure that the number of properties is smaller than the new smallest number
+            if (
+                old_portfolio_owner_size["Number of Properties"].values[0] >
+                new_owner_sizes["Number of Properties"].min()
+            ):
+                owner_big_enough.append(property.to_dict())
+                continue
+
+            owner_too_small.append(property.to_dict())
+
+        n_owner_too_small = len(owner_too_small)
+        owner_big_enough = pd.DataFrame(owner_big_enough)
+
+        summary = []
+        for _, record in owner_big_enough.iterrows():
+            # Do we have this new owner?
+            new_owner = new_portfolio[
+                new_portfolio["Company Registration No. (1)"] == record["Company Registration No. (1)"]
+                ]
+            if new_owner.empty:
+                # Why don't we have this new owner
+                new_owner_data = new_matched_addresses[
+                    new_matched_addresses["Company Registration No. (1)"] == record["Company Registration No. (1)"]
+                    ]
+
+                new_owner_data_filtered = new_owner_data[
+                    ~new_owner_data["sold_recently"] & ~new_owner_data["sale_lodged_recently"]
+                    ]
+
+                summary.append(
+                    {
+                        "Owner Name": record["Proprietor Name (1)"],
+                        "Owner reg id": record["Company Registration No. (1)"],
+                        "N properties in new portfolio before filtering": new_owner_data.shape[0],
+                        "N properties in new portfolio after filtering": new_owner_data_filtered.shape[0],
+                    }
+
+                )
+                continue
+            raise Exception("something went wrong")
+
+        summary = pd.DataFrame(summary)
+
+        not_accounted_for = summary[
+            (
+                summary["N properties in new portfolio before filtering"] <
+                previous_owner_sizes["Number of Properties"].min()
+            )
+        ]
+
+        # We have two owners not accounted for:
+        # ALLMID LIMITED, 01959058
+        # CORAL RACING LIMITED, 541600
+        # What happened to these owners?
+        new_epc = pd.read_excel("EPC F & G Properties - V2.xlsx")
+        allmid = previous_portfolio[previous_portfolio["Company Registration No. (1)"] == "01959058"].copy()
+        # Check if any of the properties are not in the new EPC data
+        allmid["not_in_new_epc"] = ~allmid["UPRN"].isin(new_epc["UPRN"])
+        allmid["not_in_matched_pre_filtered"] = ~allmid["UPRN"].isin(new_matched_addresses["UPRN"])
+        # In the previous portfolio, Allmid had 4 properties and in the re-build, it has just 2. Why?
+        # Firstly, one of their properties was re-surveyed not at an F/G
+        # Secondly, one of their properties is no longer owned by them: 
+        # https://www.zoopla.co.uk/property/uprn/100070553074/
+        # So as an owner, they fell out of the ranking
+        coral_racing = previous_portfolio[previous_portfolio["Company Registration No. (1)"] == "541600"].copy()
+        coral_racing["not_in_new_epc"] = ~coral_racing["UPRN"].isin(new_epc["UPRN"])
+        coral_racing["not_in_matched_pre_filtered"] = ~coral_racing["UPRN"].isin(new_matched_addresses["UPRN"])
+        # Coral goes down from 4 -> 1 on refresh, so what happened?
+        # 1) 2 properties had new EPCs and re-scored higher
+        # 2) 1 property, 85A Market Street, Church Gresley, Swadlincote, DE11 9PN is no longer matched to the ownership
+        #    data, which is correct
+
+        # Why were these units lost?
+        # There's just 1 owner, who is BARHAM PROPERTY LTD
+        owner_too_big_ids = owner_big_enough["Company Registration No. (1)"].unique()
+        owner_too_big_names = owner_big_enough["Proprietor Name (1)"].unique()
+        previous_owner_size = previous_owner_sizes[
+            previous_owner_sizes["Company Registration No. (1)"].isin(owner_too_big_ids)
+        ]
+        new_owner_size = new_matched_addresses[
+            new_matched_addresses["Company Registration No. (1)"].isin(owner_too_big_ids) |
+            new_matched_addresses["Proprietor Name (1)"].isin(owner_too_big_names)
+            ]
+
+        n_unsold = new_owner_size[~new_owner_size["sold_recently"] & ~new_owner_size["sale_lodged_recently"]].shape
+
+        # Happy with the justification to this point
+        assert (
+            (n_sold + n_with_sale_epc_but_not_yet_sold + n_owner_too_small + len(owner_big_enough)) ==
+            dropped_units_matched.shape[0]
+        )
+
+        # We now have a list of properties that were lost from the previous iteration to the next that were not matched
+        dropped_units_unmatched = dropped_units[
+            ~dropped_units["UPRN"].isin(new_matched_addresses["UPRN"])
+        ].copy()
+
+        # A few possibilities: They aren't in the EPC data?
+        new_epc = pd.read_excel("EPC F & G Properties - V2.xlsx")
+        unmatched_not_in_epc = dropped_units_unmatched[
+            ~dropped_units_unmatched["UPRN"].isin(new_epc["UPRN"])
+        ]
+        # There are 17 units that have had new EPCs above a G
+        # Who were the owners? - various, nothing particularly remarkable
+        (
+            previous_portfolio[
+                previous_portfolio["UPRN"].isin(unmatched_not_in_epc["UPRN"])
+            ]["Proprietor Name (1)"].value_counts()
+        )
+
+        # 22 final units to be accounted for...!
+        unmatched_in_epc = dropped_units_unmatched[
+            dropped_units_unmatched["UPRN"].isin(new_epc["UPRN"])
+        ]
+
+        # Some of them will be due to ownership
+        # TODO: Read in freehold/leashold data and see how many of these were non-exact matches!
+        leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")
+        freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
+        combined_matching_lookup = pd.concat([leasehold_matching_lookup, freehold_matching_lookup])
+        # THis is 13 matches, all of them approximate
+        weak_matches = unmatched_in_epc.merge(combined_matching_lookup, how="inner", on="UPRN")
+
+        # These have been lost due to ownership updates. This has been checked manually for every unit and there has
+        # been sale activity for each one, justifying the change in ownership data
+        remaining_matches = unmatched_in_epc[
+            ~unmatched_in_epc["UPRN"].isin(weak_matches["UPRN"])
+        ]
+
+        assert dropped_units.shape[0] == (
+            (n_sold + n_with_sale_epc_but_not_yet_sold + n_owner_too_small + len(owner_big_enough)) + len(
+            weak_matches) + unmatched_not_in_epc.shape[0]
+        )
--- a/etl/customers/newhaven/init.py
+++ b/etl/customers/newhaven/init.py
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@ -0,0 +1,378 @@
+import inspect
+import pandas as pd
+from etl.epc.settings import EARLIEST_EPC_DATE
+from pathlib import Path
+import numpy as np
+from utils.s3 import save_csv_to_s3
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+CUSTOMER_DATA_DIRECTORY = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data"
+
+USER_ID = 8
+PORTFOLIO_ID = 90
+
+
+def make_asset_list():
+    """
+    Set up a small asset list for the study
+    """
+
+    # Read in EPC data for Lewes
+    lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
+    epc_data = pd.read_csv(lewes_directory, low_memory=False)
+    # Rename the columns to the same format as the api returns
+    epc_data.columns = [c.replace("_", "-").lower() for c in epc_data.columns]
+
+    # Take just date before the date threshold
+    epc_data = epc_data[epc_data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+    epc_data = epc_data[~pd.isnull(epc_data["uprn"])]
+    epc_data["uprn"] = epc_data["uprn"].astype(int).astype(str)
+    # Take the newest EPC per uprn
+    epc_data = epc_data.sort_values("lodgement-date").groupby("uprn").last().reset_index()
+    # /Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data/
+    # We read in the multiple data sources
+    address_base = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/OS AddressBase Premium/OS AddressBase Premium.csv",
+        low_memory=False,
+    )
+    # Filter on resi
+    address_base = address_base[address_base["Primary Code Description"] == "Residential"]
+    address_base["UPRN"] = address_base["UPRN"].astype(int).astype(str)
+
+    pv_potential = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/Domestic Rooftop PV Potential/Domestic Rooftop PV Potential.csv",
+        low_memory=False,
+    )
+    pv_potential["UPRN"] = pv_potential["UPRN"].astype(int).astype(str)
+
+    ashp_potential = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/Air Source Heat Pump Potential/Air Source Heat Pump Potential.csv",
+        low_memory=False,
+    )
+    ashp_potential["UPRN"] = ashp_potential["UPRN"].astype(int).astype(str)
+
+    ashp_potential[ashp_potential["UPRN"] == "100060067063"].squeeze()
+
+    insulation_potential = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/Insulation Potential/Insulation Potential.csv",
+        low_memory=False,
+    )
+    insulation_potential["UPRN"] = insulation_potential["UPRN"].astype(int).astype(str)
+
+    renewables_cost = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/Low Carbon Technology Costs/Low Carbon Technology Costs.csv",
+        low_memory=False,
+    )
+    renewables_cost["UPRN"] = renewables_cost["UPRN"].astype(int).astype(str)
+
+    # Merge the EPC data onto address base
+    asset_list = address_base[
+        [
+            "UPRN", "Class Description", "Relative Height - Eaves",
+        ]
+    ].merge(
+        epc_data[
+            ["uprn", "current-energy-efficiency", "current-energy-rating", "address1", "postcode", "floor-height",
+             "property-type", "built-form", "co2-emissions-current"]],
+        how="left",
+        left_on="UPRN",
+        right_on="uprn"
+    ).drop(
+        columns=["uprn"]
+    ).merge(
+        insulation_potential[["UPRN", "EPC Rating", "Wall Area [m^2]", "Building Area [m^2]"]],
+        how="left",
+        on="UPRN"
+    ).rename(
+        columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
+    )
+
+    had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
+    below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
+    below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
+    had_an_epc["energy-efficiency-rating"].value_counts()
+    asset_list["current-energy-rating"].value_counts()
+    asset_list["co2-emissions-current"].mean()
+    # # Get the underlying data of a histograme
+    import matplotlib.pyplot as plt
+    n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
+    #
+    bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1)  # Bins from 50 to 150 with a step of 10
+    #
+    # # Step 3: Calculate the frequency of data in each bin
+    hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
+
+    # Take properties below a B - there are 2844 units
+    asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]
+    # Drop caravans
+    asset_list = asset_list[asset_list["Class Description"] != "Caravan"]
+    asset_list = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
+
+    # Take a sample, for properties that have an EPC, with a seed
+    # asset_list = asset_list.sample(frac=0.5, random_state=42)
+
+    AVG_FLOOR_HEIGHT = asset_list["floor-height"].median()
+
+    def estimate_n_floors(
+        building_height, floor_height, address_base_property_description, epc_property_type,
+    ):
+
+        if address_base_property_description == "Self Contained Flat (Includes Maisonette / Apartment)":
+            if epc_property_type in ["Flat"]:
+                return 1
+            if epc_property_type == "Maisonette":
+                return 2
+            return None
+
+        if pd.isnull(floor_height):
+            return np.round(building_height / AVG_FLOOR_HEIGHT)
+
+        return np.round(building_height / floor_height)
+
+    # Estimate the number of floors
+    asset_list["number_of_floors"] = asset_list.apply(
+        lambda x: estimate_n_floors(
+            building_height=x["Relative Height - Eaves"],
+            floor_height=x["floor-height"],
+            address_base_property_description=x["Class Description"],
+            epc_property_type=x["property-type"],
+        ),
+        axis=1
+    )
+    # Drop any entires with null floors because that means the ordnance survey data doesn't align with the epc data
+    asset_list = asset_list[~pd.isnull(asset_list["number_of_floors"])]
+    # Drop any entries with null insulation wall area
+    asset_list = asset_list[~pd.isnull(asset_list["insulation_wall_area"])]
+
+    # D    0.419929
+    # C    0.391459
+    # E    0.160142
+    # F    0.017794
+    # G    0.010676
+
+    # Total asset list:
+    # D    0.450409
+    # C    0.412016
+    # E    0.110203
+    # F    0.020263
+    # G    0.007110
+
+    # We do the followings:
+    # 1) Create final asset list
+    # 2) Create Non-intrusive recommendations
+    # 3) Create a third party costing object
+
+    cost_testing = renewables_cost.merge(
+        insulation_potential, how="inner", on="UPRN"
+    )
+
+    cost_testing["cwi_cost_per_m2"] = cost_testing["Insulation - Cavity Wall - Total"] / cost_testing["Wall Area [m^2]"]
+    # Their cavity wall insulation is £8 per m^2
+
+    cost_testing["ewi_cost_per_m2"] = cost_testing["Insulation - External Wall - Total"] / cost_testing[
+        "Wall Area [m^2]"]
+
+    cost_testing["li_cost_per_m2"] = cost_testing["Insulation - Loft - Total"] / cost_testing["Building Area [m^2]"]
+
+    cost_testing["underfloor_cost_per_m2"] = cost_testing["Insulation - Under Floor- Total"] / cost_testing[
+        "Building Area [m^2]"]
+
+    final_asset_list = asset_list.rename(
+        columns={"UPRN": "uprn", "address1": "address", "floor_area": "insulation_floor_area"}
+    )[["uprn", "address", "postcode", "insulation_wall_area", "insulation_floor_area", "number_of_floors"]]
+
+    # Create non-invasive recommendations, which come from the solar potential and ASHP potential data sources
+    non_invasive_recommendations = []
+    for _, row in final_asset_list.iterrows():
+        property_ashp_potential = ashp_potential[
+            (ashp_potential["UPRN"] == row["uprn"]) & ashp_potential["Overall Suitability Rating"]
+            ]
+        property_pv_potential = pv_potential[
+            (pv_potential["UPRN"] == row["uprn"]) & pv_potential["Overall Suitability"]
+            ]
+        property_costs = renewables_cost[renewables_cost["UPRN"] == row["uprn"]]
+
+        property_non_invasive_recs = []
+        if not property_ashp_potential.empty:
+
+            if property_costs.empty:
+                similar_properties = ashp_potential[
+                    ashp_potential["Overall Suitability Rating"] &
+                    (ashp_potential["Recommended Heat Pump Size [kW]"] ==
+                     property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0])
+                    ].merge(
+                    renewables_cost, how="inner", on="UPRN"
+                )
+                property_costs = similar_properties[["Air Source Heat Pump - Total"]].mean().to_frame().T
+
+            property_non_invasive_recs.append(
+                {
+                    "type": "air_source_heat_pump",
+                    "suitable": True,
+                    "size": property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0],
+                    "cost": property_costs["Air Source Heat Pump - Total"].values[0],
+                    "ashp_only_heating_recommendation": True
+                }
+            )
+        else:
+            property_non_invasive_recs.append(
+                {
+                    "type": "air_source_heat_pump",
+                    "suitable": False
+                }
+            )
+
+        if not property_pv_potential.empty:
+            property_non_invasive_recs.append(
+                {
+                    "type": "solar_pv",
+                    "suitable": True,
+                    "array_wattage": property_pv_potential["Recommended Array Size [kW]"].values[0] * 1000,
+                    "initial_ac_kwh_per_year": property_pv_potential["Annual Generation [kWh]"].values[0],
+                    "panneled_roof_area": property_pv_potential["Roof area suitable for PV [m^2]"].values[0],
+                    "cost": property_costs["Rooftop PV - Total"].values[0],
+                }
+            )
+        else:
+            property_non_invasive_recs.append(
+                {
+                    "type": "solar_pv",
+                    "suitable": False
+                }
+            )
+
+        non_invasive_recommendations.append(
+            {
+                "uprn": row["uprn"],
+                "recommendations": property_non_invasive_recs,
+            }
+        )
+
+    # Save the asset list
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=final_asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # We add a patch to one of the units because there's no data for the built form
+    # We would be able to handle this automatically in the future, when using OS API
+    patches = [
+        {
+            "uprn": "10033266220",
+            "built-form": "Semi-Detached",
+        },
+        {'uprn': '10033266219', 'built-form': 'Semi-Detached'}
+    ]
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
+    # Create three scenarios
+    body1 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Demand Reduction - no solid wall, windows, LEDs",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
+            "lighting", "windows", "secondary_heating"
+        ],
+        "budget": None,
+    }
+    print(body1)
+
+    body2 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Demand Reduction - no solid wall, floors or heating",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
+        ],
+        "budget": None,
+    }
+    print(body2)
+
+    # 2.5 - full fabric, no decant
+    body2_5 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Demand Reduction - no solid wall, floors or heating",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation", "floor_insulation", "heating", "solar_pv",
+        ],
+        "budget": None,
+    }
+    print(body2_5)
+
+    # Scenario B
+    body3 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall or floors",
+        "multi_plan": True,
+        "exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
+        "budget": None,
+    }
+    print(body3)
+
+    # Scenario 4 - deep fabric, no IWI, floor
+    body4 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Whole House",
+        "multi_plan": True,
+        "budget": None,
+    }
+    print(body4)
--- a/etl/customers/newhaven/slides.py
+++ b/etl/customers/newhaven/slides.py
@ -0,0 +1,417 @@
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+
+
+def get_data(portfolio_id, scenario_ids):
+    session = sessionmaker(bind=db_engine)()
+    session.begin()
+
+    # Get properties and their details for a specific portfolio
+    properties_query = session.query(
+        PropertyModel,
+        PropertyDetailsEpcModel
+    ).join(
+        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
+    ).filter(
+        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
+    ).all()
+
+    # Transform properties data to include all fields dynamically
+    properties_data = [
+        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
+         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
+            PropertyDetailsEpcModel.__table__.columns}}
+        for prop in properties_query
+    ]
+
+    # Get property IDs from fetched properties
+
+    # Get plans linked to the fetched properties
+    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+
+    # Transform plans data to include all fields dynamically
+    plans_data = [
+        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        for plan in plans_query
+    ]
+
+    # Extract plan IDs for filtering recommendations through PlanRecommendations
+    plan_ids = [plan['id'] for plan in plans_data]
+
+    # Get recommendations through PlanRecommendations for those plans and that are default
+    recommendations_query = session.query(
+        Recommendation,
+        Plan.scenario_id
+    ).join(
+        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
+    ).join(
+        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
+    ).filter(
+        PlanRecommendations.plan_id.in_(plan_ids),
+        Recommendation.default == True  # Filtering for default recommendations
+    ).all()
+
+    # Transform recommendations data to include all fields dynamically and include scenario_id
+    recommendations_data = [
+        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
+                                                                                                           col.name) for
+            col in Recommendation.__table__.columns},
+         "Scenario ID": rec.scenario_id}
+        for rec in recommendations_query
+    ]
+
+    session.close()
+
+    return properties_data, plans_data, recommendations_data
+
+
+def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids):
+    # properties_starting_with_electric_heating = properties_df[
+    #     properties_df["mainfuel"].isin(
+    #         ["Electricity not community", "Electricity electricity unspecified tariff"]
+    #     )
+    # ]["id"].tolist()
+
+    # Get the recommendations for the scenario, default
+    scenario_comparison_df = []
+    scenario_comparison_df_2 = []
+    cost_per_kwh_saved_table = []
+    for scenario_id in scenario_ids:
+        # Get the recommendations for the scenario, default
+        scenario_recommendations = recommendations_df[
+            (recommendations_df["Scenario ID"] == scenario_id) &
+            (recommendations_df["default"] == True)
+            ].copy()
+
+        scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
+            lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
+            axis=1)
+        scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
+            lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
+
+        # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
+        scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
+            lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
+                'kwh_savings'], axis=1)
+
+        # We need to determine if any of the properties start with electric heating or end with it
+        # property_electric_heating = []
+        # for pid, recs in scenario_recommendations.groupby("property_id"):
+        #     has_ashp = recs[recs["description"].str.contains("air source heat pump")]
+        #     if not has_ashp.empty:
+        #         property_electric_heating.append(pid)
+        #         continue
+        #     has_heating_rec = recs[recs["description"].str.contains("high heat retention electric")]
+        #     if not has_heating_rec.empty:
+        #         property_electric_heating.append(pid)
+        #         continue
+
+        grouped_data = scenario_recommendations.groupby(['property_id']).agg({
+            'Estimated Kwh Savings': 'sum',
+            'ligting_kwh': 'sum',
+            'solar_kwh': 'sum',
+            "estimated_cost": "sum"
+        }).reset_index()
+
+        comparison = properties_df.drop_duplicates().merge(
+            grouped_data, on=["property_id"], how="left"
+        )
+
+        comparison["Post Retrofit Heating & Hotwater kwh"] = (
+            comparison["current_energy_demand_heating_hotwater"] - \
+            comparison["Estimated Kwh Savings"]
+        )
+
+        avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
+
+        # We now, for properties that have a plan, do a before and after
+        with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]
+
+        avgs2 = with_savings[
+            ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
+        avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[
+            "Post Retrofit Heating & Hotwater kwh"]
+        avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]
+
+        # We also calculate the cost per kwh saves
+        total_kwh_saved = (
+            with_savings["Estimated Kwh Savings"].sum() +
+            with_savings["ligting_kwh"].sum() +
+            with_savings["solar_kwh"].sum()
+        )
+        total_cost = with_savings["estimated_cost"].sum()
+        cost_per_kwh_saved = total_cost / total_kwh_saved
+
+        scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
+        scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
+        cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved})
+
+    scenario_comparison_population = pd.DataFrame(scenario_comparison_df)
+    scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2)
+    cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table)
+
+    return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table
+
+
+def slides():
+    # Prepares the information required for the slides
+
+    # Right now this is the second version of the nehaven portfolio
+    portfolio_id = 90
+    # Look at one scenario at a time, otherwise this is agony
+    scenario_ids = [47, 48, 49, 50, 51]
+
+    properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)
+
+    properties_df = pd.DataFrame(properties_data)
+    plans_df = pd.DataFrame(plans_data)
+    recommendations_df = pd.DataFrame(recommendations_data)
+
+    if properties_df.shape[0] != 2553:
+        raise ValueError("The number of unique properties is not 2553")
+
+    # Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline?
+    heating_hotwater_kwh = (
+        properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']]
+        .mean()
+    )
+
+    # Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire
+    # popoulation (incl those without retrofit) and for just those being retrofit
+    # We also calculat the cost per kwh saved
+    scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = (
+        estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids)
+    )
+
+    # Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit
+    # We need to take recommndations that affect just the heating and hot water
+
+    # By property
+
+    recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace(
+        {
+            "loft_insulation": "roof_insulation",
+            "room_roof_insulation": "roof_insulation",
+            "flat_roof_insulation": "roof_insulation",
+            "hot_water_tank_insulation": "other",
+            "cylinder_thermostat": "other",
+            "sealing_open_fireplace": "other",
+            "suspended_floor_insulation": "floor_insulation",
+            "solid_floor_insulation": "floor_insulation",
+        }
+    )
+
+    recommendations_df["type_mapped"] = np.where(
+        recommendations_df["description"].str.contains("air source heat pump"),
+        "air_source_heat_pump",
+        recommendations_df["type_mapped"]
+    )
+
+    # Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID'
+    recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby(
+        ['Scenario ID', 'type_mapped']
+    ).agg({
+        'property_id': 'nunique'
+    }).reset_index()
+
+    recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties']
+    recommendation_summary["Percentage of Properties"] = 100 * (
+        recommendation_summary["Number of Properties"] / properties_df["id"].nunique()
+    )
+
+    recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])]
+
+    # MVP implementation of funding estimation for the most basic scenario, using GBIS
+
+    project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv")
+
+    def find_abs(sap_movement, starting_sap, floor_area):
+        starting_band = find_band(starting_sap)
+        finishing_band = find_band(starting_sap + sap_movement)
+        if starting_band == finishing_band:
+            return 0
+
+        if floor_area <= 72:
+            floor_area_segment = '0-72'
+        elif (floor_area > 72) and (floor_area <= 97):
+            floor_area_segment = "73-97"
+        elif (floor_area > 97) and (floor_area <= 199):
+            floor_area_segment = "98-199"
+        else:
+            floor_area_segment = "200+"
+
+        return project_scores_matrix[
+            (project_scores_matrix["Floor Area Segment"] == floor_area_segment) &
+            (project_scores_matrix["Starting Band"] == starting_band) &
+            (project_scores_matrix["Finishing Band"] == finishing_band)
+            ].squeeze()["Cost Savings"]
+
+    eco4_scores_sap_table = [
+        {'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0},
+        {'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0},
+        {'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5},
+        {'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5},
+        {'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25},
+        {'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75},
+        {'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75},
+        {'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25},
+        {'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25},
+        {'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75},
+        {'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75},
+        {'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25},
+        {'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25},
+        {'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75}
+    ]
+    eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table)
+
+    def find_band(value):
+        # Iterate through each row in the DataFrame to find the correct band
+        value_floored = np.floor(value)
+        return eco4_scores_sap_table[
+            (eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored)
+            ].squeeze()["Band"]
+
+    def identify_funding_measure(p, p_recs, is_social):
+        measures = ["cavity_wall_insulation", "loft_insulation"]
+        property_abs = []
+        for m in measures:
+            funding_measure = p_recs[p_recs["type"] == m]
+            if not funding_measure.empty:
+                funding_measure = funding_measure.squeeze()
+                project_abs = find_abs(
+                    sap_movement=funding_measure["sap_points"],
+                    starting_sap=p["current_sap_points"],
+                    floor_area=p["total_floor_area"]
+                )
+                property_abs.append({
+                    "property_id": p["property_id"],
+                    "measure": funding_measure["type"],
+                    "cost": funding_measure["estimated_cost"],
+                    "abs": project_abs,
+                    "is_social": is_social
+                })
+
+        if not property_abs:
+            return None
+
+        property_abs = pd.DataFrame(property_abs).sort_values("cost", ascending=False)
+        property_abs = property_abs.head(1).to_dict(orient="records")[0]
+        return property_abs
+
+    social_tenure = ["rental (social)", "Rented (social)"]
+    scenario_recs = recommendations_df[recommendations_df["Scenario ID"].isin([47])]
+
+    funding = []
+    for _, p in tqdm(properties_df.iterrows(), total=len(properties_df)):
+        p_recs = scenario_recs[scenario_recs["property_id"] == p["property_id"]]
+        if p_recs.empty:
+            continue
+
+        if (p["tenure"] in social_tenure) and (p["current_sap_points"] < 69):
+            f = identify_funding_measure(p, p_recs, True)
+            if f:
+                funding.append(f)
+                continue
+
+        if p["current_sap_points"] < 69:
+            f = identify_funding_measure(p, p_recs, False)
+            if f:
+                funding.append(f)
+                continue
+
+    funding = pd.DataFrame(funding)
+    conservative_abs = 20
+    funding["expected_funding"] = funding["abs"] * conservative_abs
+    # We take rows where the expected funding is higher than the cost of the works + 10%
+    funding = funding[funding["expected_funding"] >= (funding["cost"] * 1.15)]
+
+    # From the owner of the properties, the funding that they see is just the cost of the works. The actual funding
+    # recieved will go to the installer
+    # We now look at the social funding
+    social_funding = funding[funding["is_social"]]["cost"].sum()
+    # For the private funding, we need to scale this to consider the fact that only a proportion of the properties
+    # will qualify due to needing the property to fall into council tax bands A - D, and that only some of the tenants
+    # will meet the benefits criteria
+    private_funding = funding[~funding["is_social"]]["cost"].sum()
+
+    # 51% of households are recipients of benefits in the South East, in the UK
+    # (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
+
+    # We also need to deduce the % of properties in council tax bands A - D
+    # 2023 council tax bands:
+    # https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2023/council-tax-stock-of-properties
+    # -statistical-commentary
+    band_a_proportion = 0.239
+    band_b_proportion = 0.195
+    band_c_proportion = 0.219
+    band_d_proportion = 0.156
+    a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
+
+    benefits_proportion = 0.51
+
+    # Note: It's probable that an occupant of a property in council tax bands A-D is more likely to be on benefits,
+    # however we retain the regional average to be conservative
+    # We scale the private funding based on these two factors
+    private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion
+
+    n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion)
+
+    # Look at the impact of EWI for scenario
+
+    ewi_jobs = recommendations_df[
+        (recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation")
+        ]
+    ewi_jobs["estimated_cost"].sum()
+
+    has_cavity = recommendations_df[
+        (recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47)
+        ]
+    # Take the some properties in this
+    cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)]
+
+    cavity_units[cavity_units.index == 3][["uprn", "property_id"]]
+
+    z = recommendations_df[recommendations_df["property_id"] == 24525]
+
+    # Recommenation type by kwh savings per unit
+    recommendations_final_scenario = recommendations_df[
+        recommendations_df["Scenario ID"].isin([51]) &
+        (recommendations_df["default"] == True)
+        ].copy()
+    # Merge on floor area
+    recommendations_final_scenario = recommendations_final_scenario.merge(
+        properties_df[["property_id", "total_floor_area"]], on="property_id", how="left"
+    )
+    recommendations_final_scenario = recommendations_final_scenario[
+        ~pd.isnull(recommendations_final_scenario["total_floor_area"])]
+    recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \
+                                                             recommendations_final_scenario["total_floor_area"]
+
+    recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace(
+        {
+            "room_roof_insulation": "roof_insulation",
+            "flat_roof_insulation": "roof_insulation",
+            "hot_water_tank_insulation": "other",
+            "cylinder_thermostat": "other",
+            "sealing_open_fireplace": "other",
+            "suspended_floor_insulation": "floor_insulation",
+            "solid_floor_insulation": "floor_insulation",
+        }
+    )
+
+    aggs = recommendations_final_scenario.groupby("type_mapped")[
+        ["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values(
+        "kwh_savings_per_unit", ascending=False
+    )
+    aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"]
+    # Show more columns with pandas
+    pd.set_option('display.max_columns', None)
+    # Show more rows with pandas
+    pd.set_option('display.max_rows', None)
+    # Show more characters in a column
+    pd.set_option('display.max_colwidth', None)
--- a/etl/customers/orbit/archetypes.py
+++ b/etl/customers/orbit/archetypes.py
@ -0,0 +1,420 @@
+import pandas as pd
+import numpy as np
+from backend.SearchEpc import SearchEpc
+from dotenv import load_dotenv
+from tqdm import tqdm
+import os
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def clean_colnames(df):
+    secondary_cols = ["" if pd.isnull(x) else x for x in df.iloc[0, :].values]
+    new_colnames = [
+        "+".join([df.columns[i], secondary_cols[i]]) if secondary_cols[i] else df.columns[i]
+        for i, c in enumerate(df.columns)
+    ]
+    # Drop row 0
+    df = df.drop(0)
+    df.columns = new_colnames
+    return df
+
+
+def lesney_farms():
+    """
+    Some rough and ready analysis to get a view of what the achetypes could be, ahead of a meeting with Wates
+    on the 28th Aug 2024
+    :return:
+    """
+
+    all_locations = [
+        "Forest Road Erith",
+        "Lesney Farms",
+        "Brook Street 155 - 243",
+        "Hazel Drive",
+        "Page Crescent",
+        "Brook Salmon Roberts and Chapma",
+        "Beacon Road"
+    ]
+
+    all_assets = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
+        "reduced.xlsx",
+        sheet_name="Full Property List",
+        header=1
+    )
+    all_assets = clean_colnames(all_assets)
+    all_assets["Location"] = None
+
+    locations = {
+        location_name: clean_colnames(pd.read_excel(
+            "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
+            "reduced.xlsx",
+            sheet_name=location_name,
+            header=1
+        )) for location_name in all_locations
+    }
+
+    for loc in all_locations:
+        all_assets["Location"] = np.where(
+            all_assets["Asset Reference"].isin(locations[loc]["Asset Reference"]),
+            loc,
+            all_assets["Location"]
+        )
+
+    if pd.isnull(all_assets["Location"]).sum():
+        raise Exception("something went wrong")
+
+    # 234 properties below EPC C
+    below_epc_c = all_assets[all_assets["PRE CALCULATED EPC"].isin(["D", "E", "F", "G"])].copy()
+
+    # We simplify wall type
+    below_epc_c["wall_type_simplified"] = below_epc_c["Wall Type"].str.split(" ").str[0]
+
+    known_no_epc = [
+        28679,  # These is no EPC for 11 Page Crescent, Erith, Kent, DA8 2HJ, just 11A
+        29291,  # No EPC for 225 Slade Green Road, Erith, Kent, DA8 2JW
+    ]
+    # Get the EPC data
+    # epc_data = []
+    # for _, home in tqdm(all_assets.iterrows(), total=len(all_assets)):
+    #     if home["Asset Reference"] in known_no_epc:
+    #         continue
+    #
+    #     address = home["Address"]
+    #     # Spelling error
+    #     if "Frinstead" in address:
+    #         address = address.replace("Frinstead", "Frinsted")
+    #
+    #     address1 = address.split(",")[0]
+    #
+    #     asset_type_map = {
+    #         "HOUSE": "House",
+    #         "BUNGALOWS": "Bungalow",
+    #         "FLATS": "Flat",
+    #         "MAISONETTES": "Maisonette",
+    #     }
+    #
+    #     searcher = SearchEpc(
+    #         address1=address1,
+    #         postcode=home["Address - Postcode"],
+    #         auth_token=EPC_AUTH_TOKEN,
+    #         os_api_key="",
+    #         full_address=address,
+    #     )
+    #     searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
+    #     searcher.ordnance_survey_client.built_form = None
+    #
+    #     searcher.find_property(skip_os=True)
+    #     if searcher.newest_epc is None:
+    #         raise Exception("Couldn't find")
+    #
+    #     epc_data.append(
+    #         {
+    #             "Asset Reference": home["Asset Reference"],
+    #             **searcher.newest_epc.copy()
+    #         }
+    #     )
+    #
+    # epc_data = pd.DataFrame(epc_data)
+    epc_data = pd.read_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley EPC data.csv", )
+    # epc_data.to_csv(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley EPC data.csv", index=False
+    # )
+
+    epc_comparison = all_assets[
+        ['Asset Reference', 'Address', 'PRE CALCULATED EPC']
+    ].merge(
+        epc_data[["Asset Reference", "current-energy-rating", "lodgement-date"]],
+        on='Asset Reference',
+        how="left"
+    )
+
+    # There are a large # of properties (147) that have different pre calcualted EPC rating, to what's on the registry
+    # These may be internally held EPRs but this may inform which properties we might want to prioritise for survey
+    different_epcs = epc_comparison[
+        epc_comparison["PRE CALCULATED EPC"] != epc_comparison["current-energy-rating"]
+        ]
+
+    not_c = different_epcs[
+        (different_epcs["PRE CALCULATED EPC"] == "C") &
+        (different_epcs["current-energy-rating"] != "C")
+        ]
+
+    system_builds = below_epc_c[
+        below_epc_c["Wall Type"].str.contains("SystemBuilt")
+    ].copy()
+
+    combinations = system_builds[
+        ['Asset Type', 'Property Type', 'Location', 'PRE CALCULATED EPC', 'Wall Type', ]
+    ].drop_duplicates()
+
+    system_build_data_comparison = system_builds.merge(
+        epc_data[
+            ["Asset Reference", "walls-description", "roof-description", "current-energy-rating", "lodgement-date",
+             "current-energy-efficiency"]],
+        left_on='Asset Reference',
+        right_on='Asset Reference',
+        how="left"
+    )
+
+    # Apply patches
+    patches = {
+        25847: {"Property Type": "Semi Detached House"},
+    }
+
+    for asset_ref, patch in patches.items():
+        for k, v in patch.items():
+            system_build_data_comparison.loc[
+                system_build_data_comparison["Asset Reference"] == asset_ref,
+                k
+            ] = v
+
+    archetype_columns = [
+        ["Asset Type", "Property Type", "Wall Type", "Location"],
+        ["Asset Type", "Property Type", "Location"],
+        ["Asset Type", "Property Type", "Wall Type", "Location", "PRE CALCULATED EPC", "roof-description"],
+        ["Asset Type", "Property Type", "Location", "PRE CALCULATED EPC"]
+    ]
+
+    summary = []
+    for cols in archetype_columns:
+        combinations = system_build_data_comparison[cols].drop_duplicates()
+        summary.append(
+            {
+                "cols": cols,
+                "number_archetypes": len(combinations),
+            }
+        )
+
+    summary = pd.DataFrame(summary)
+
+    # Let's use this column combination
+    chosen_combination = [
+        "Asset Type", "Property Type", "Wall Type", "Location", "PRE CALCULATED EPC", "roof-description"
+    ]
+
+    # For this combination, let's find the properties
+    archetype_combinations = system_build_data_comparison[chosen_combination].drop_duplicates().reset_index(drop=True)
+    archetype_combinations["archetype ID"] = archetype_combinations.index
+
+    archetyped_data = system_build_data_comparison.merge(
+        archetype_combinations, how="left", on=chosen_combination
+    )
+
+    counts = archetyped_data["archetype ID"].value_counts()
+    # Archetype 0: Semi D, As built system built, Pre calculated EPC D, flat insulated roof, (Lesney-0)
+    # Archetype 1: Semi D, Externally insulated system built, Pre calculated EPC D, flat insulated roof (Lesney-1)
+    # Archetype 4: Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated (Lesney-2)
+    # Archetype 3: Semi D, Externally insulated system built, Pre calculated EPC D, flat roof uninsulated (assumed) (
+    # Lesney-3)
+    # 0    21
+    # 1    11
+    # 4    11
+    # 3     3
+    # 2     1
+    # 5     1
+    # 6     1
+    # 7     1
+    # 8     1
+    # 9     1
+
+    # This archetype is the same as 0, apart from the pre calculate EPC being an E. The registry says this is a D
+    # This has been added to additonal units
+    eg1 = archetyped_data[archetyped_data["archetype ID"] == 2]
+
+    # Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated
+    # This looks like it would fit either in archetype
+    eg2 = archetyped_data[archetyped_data["archetype ID"] == 5]
+
+    eg3 = archetyped_data[archetyped_data["archetype ID"] == 6]
+
+    # Archetypes 7, 8, 9 are all similar, Semi D, Uninsulated system built, with pitched lofts with up to 200mm
+    # insulation in the lofts
+
+    # It's just the three units
+    # They're all labelled as
+    pitched_system_built_properties = archetyped_data[archetyped_data["archetype ID"].isin([9, 10, 11])]
+    pitched_system_built_properties["Address"]
+
+    notes = [
+        {
+            "Asset Reference": 27445,
+            "note": "Confirmed this has a pitched roof on Maps"
+        },
+        {
+            "Asset Reference": 27443,
+            "note": "Confirmed this has a pitched roof on Maps"
+        },
+        {
+            "Asset Reference": 27442,
+            "note": "Confirmed this has a pitched roof on Maps"
+        },
+        {
+            "Asset Reference": 25847,
+            "note": "This is labelled as a mid-terrace but the EPC data + Maps suggest it's a semi-detached"
+        }
+    ]
+
+    # These are As Built, System Built
+    system_built_streets = (
+        archetyped_data["Address"].str.split(",").str[0].str.split(" ").str[1].unique()
+    )
+
+    all_assets_w_epcs = all_assets.merge(epc_data, on="Asset Reference", how="left")
+
+    # Grab all of the properties on this street that aren't system built
+    streets_not_system_builds = all_assets_w_epcs[
+        all_assets_w_epcs["Address"].str.split(",").str[0].str.split(" ").str[1].isin(system_built_streets) &
+        ~all_assets_w_epcs["Wall Type"].str.contains("SystemBuilt")
+        ]
+
+    system_builds = archetyped_data[
+        archetyped_data["Wall Type"].str.contains("SystemBuilt")
+    ][["Asset Reference", "Address", "Wall Type", "walls-description"]].sort_values("Address")
+
+    birling_street_system_builds = system_builds[system_builds["Address"].str.contains("Birling")]
+    halstead_street_system_builds = system_builds[system_builds["Address"].str.contains("Halstead")]
+    brasted_street_system_builds = system_builds[system_builds["Address"].str.contains("Brasted")]
+    frinstead_street_system_builds = system_builds[
+        system_builds["Address"].str.contains("Frinstead") | system_builds["Address"].str.contains("Frinsted")
+        ]
+
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+    streets_not_system_builds[["Asset Reference", "Address", "Wall Type", "walls-description"]]
+
+    system_builds[system_builds["Address"].str.contains("Birling")]
+
+    # Possible System Builds
+
+    # Create the proposed sample
+    # lesney-0
+    archetyped_data["lodgement-date"] = pd.to_datetime(archetyped_data["lodgement-date"])
+
+    lesney_0 = archetyped_data[archetyped_data["archetype ID"] == 0].copy()
+    # Get the oldest EPC per postcode
+    lesney_0 = lesney_0.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_0[["Address", "Address - Postcode", "lodgement-date"]]
+
+    lesney_1 = archetyped_data[archetyped_data["archetype ID"] == 1].copy()
+    lesney_1 = lesney_1.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_1[["Address", "Address - Postcode", "lodgement-date"]]
+
+    lesney_2 = archetyped_data[archetyped_data["archetype ID"] == 4].copy()
+    lesney_2 = lesney_2.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_2[["Address", "Address - Postcode", "lodgement-date"]]
+
+    lesney_3 = archetyped_data[archetyped_data["archetype ID"] == 3].copy()
+    lesney_3 = lesney_3.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_3[["Address", "Address - Postcode", "lodgement-date", "roof-description"]]
+
+    # Get the pitched roof properties, which are lesney-4
+    lesney_4 = archetyped_data[archetyped_data["archetype ID"].isin([7, 8, 9])].copy()
+    lesney_4 = lesney_4.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_4[["Address", "Address - Postcode", "lodgement-date", "roof-description"]]
+
+    assigned_archetypes = archetyped_data[
+        ["Asset Reference", "archetype ID", "Address", "Address - Postcode"] + chosen_combination +
+        ["lodgement-date", "current-energy-rating", "current-energy-efficiency", "walls-description"]
+        ].copy()
+    # Map the archetype ID to their string representation
+    assigned_archetypes["archetype ID"] = assigned_archetypes["archetype ID"].replace(
+        {
+            0: "Lesney-0",
+            1: "Lesney-1",
+            4: "Lesney-2",
+            3: "Lesney-3",
+            7: "Lesney-4",
+            8: "Lesney-4",
+            9: "Lesney-4",
+            2: "Lesney-0",
+            5: "Lesney-2",
+            6: "Lesney-0",
+        }
+    )
+
+    assigned_archetypes["Asset Reference"] = assigned_archetypes["Asset Reference"].astype(int)
+
+    assigned_archetypes.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/assigned_archetypes.csv", index=False
+    )
+
+
+def culworth_court():
+    """
+    Some rough works on Cuthwork Court
+
+    They're looking at an ASHP/GSHP
+
+    :return:
+    """
+
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/001 - EPC CULWORTH COURT.xlsx",
+        sheet_name="EPC C",
+        header=1
+    )
+    asset_list = clean_colnames(asset_list)
+
+    # Let's get the EPC data
+    # Get the EPC data
+    epc_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        address = home["Address"]
+        # Spelling error
+        if "Frinstead" in address:
+            address = address.replace("Frinstead", "Frinsted")
+
+        address1 = address.split(",")[0]
+
+        asset_type_map = {
+            "HOUSE": "House",
+            "BUNGALOWS": "Bungalow",
+            "FLATS": "Flat",
+            "MAISONETTES": "Maisonette",
+        }
+
+        searcher = SearchEpc(
+            address1=address1,
+            postcode=home["Address - Postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            full_address=address,
+        )
+        searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            raise Exception("Couldn't find")
+
+        epc_data.append(
+            {
+                "Asset Reference": home["Asset Reference"],
+                **searcher.newest_epc.copy()
+            }
+        )
+    epc_data = pd.DataFrame(epc_data)
+
+    asset_list = asset_list.merge(epc_data, on="Asset Reference", how="left")
+    asset_list["floor-level"] = np.where(
+        asset_list["floor-level"] == "NODATA!",
+        "",
+        asset_list["floor-level"]
+    )
+
+    asset_list["built-form"] = np.where(
+        asset_list["built-form"] == "Enclosed End-Terrace",
+        "End-Terrace",
+        asset_list["built-form"]
+    )
+
+    archetype_combinations = asset_list[
+        ["Asset Type", "Property Type", "built-form", "floor-level"]
+    ].drop_duplicates()
+
+    z = asset_list[asset_list["built-form"] == "Enclosed End-Terrace"]
--- a/etl/customers/orbit/funding_example_portfolio.py
+++ b/etl/customers/orbit/funding_example_portfolio.py
@ -0,0 +1,141 @@
+import pandas as pd
+
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 100
+
+
+def app():
+    """
+    This function sets up an asset list with just a few properties to model the impact of the following scenarios:
+    1) EWI
+    2) EWI + Solar
+    3) EWI + Solar + ASHP
+    :return:
+    """
+
+    asset_list = [
+        # This is an example of a low D - SAP score is 60
+        {
+            "address": "37, Birling Road",
+            "postcode": "DA8 3JQ",
+            "uprn": 100020225444
+        },
+        {
+            "address": "16, Brasted Road",
+            "postcode": "DA8 3HU",
+            "uprn": 100020225805
+        },
+        {
+            "address": "25, Birling Road",
+            "postcode": "DA8 3JQ",
+            "uprn": 100020225432,
+        },
+        {
+            "address": "4, Halstead Road",
+            "postcode": "DA8 3HX",
+            "uprn": 100020229555
+        }
+    ]
+    asset_list = pd.DataFrame(asset_list)
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    non_invasive_recs = []
+    for _, al in asset_list.iterrows():
+        solar_rec = {
+            "type": "solar_pv",
+            "suitable": True,
+            "array_wattage": 4000,
+            "initial_ac_kwh_per_year": 3800,
+            "cost": 4009,
+            "panneled_roof_area": 20  # Rough estimate for 10 panels, around 1m x 1.8m (accomodate gaps and 30cm edge)
+        }
+
+        non_invasive_recs.append({
+            "uprn": al["uprn"],
+            "recommendations": [solar_rec],
+        })
+
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recs),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    body1 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "scenario_name": "ECO4 funding - EWI",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation",
+            "roof_insulation", "ventilation", "floor_insulation", "windows", "fireplace", "heating", "hot_water",
+            "lighting", "secondary_heating", "solar_pv"
+        ],
+        "budget": None,
+    }
+    print(body1)
+
+    body2 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "ECO4 funding - EWI + Solar",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation",
+            "roof_insulation",
+            "ventilation",
+            "floor_insulation",
+            "windows",
+            "fireplace",
+            "heating",
+            "hot_water",
+            "lighting",
+            "secondary_heating",
+            "boiler_upgrade",
+            "high_heat_retention_storage_heater",
+        ],
+        "budget": None,
+    }
+    print(body2)
+
+    body3 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "ECO4 funding - EWI + Solar + ASHP",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation",
+            "roof_insulation", "ventilation", "floor_insulation", "windows", "fireplace", "hot_water",
+            "lighting", "secondary_heating",
+        ],
+        "budget": None,
+    }
+    print(body3)
--- a/etl/customers/vectis/outputs.py
+++ b/etl/customers/vectis/outputs.py
@ -0,0 +1,196 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+
+def app():
+    # This is the payload to be used to extract the energy assessment data from s3 and upload it to the database,
+    # as well as produce links to each of the uploaded documents.
+
+    portfolio_id = 101
+
+    body = {
+        "portfolio_id": portfolio_id,
+        "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
+        "project_code": "VEC001",
+    }
+
+    # These are the recommendations based on the on-site survey of the property.
+    non_intrusive_recommendations = [
+        {
+            # 2 Grove Mansions
+            "uprn": 121016121,
+            "recommendations": [
+                {
+                    "type": "draught_proofing",
+                    "cost": 123,
+                    "survey": True,
+                    "sap_points": 1
+                },
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 3
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "suspended_floor_insulation", "cost": None, "survey": True, "sap_points": 2},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 5},
+            ]
+        },
+        {
+            # 8 Grove Mansions
+            "uprn": 10024087855,
+            "recommendations": [
+                {"type": "draught_proofing", "cost": 123, "survey": True, "sap_points": 2},
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 4
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 0},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, 'sap_points': 5},
+            ]
+        },
+        {
+            # 9 Grove Mansions
+            "uprn": 121016128,
+            "recommendations": [
+                {"type": "draught_proofing", "cost": 123, "survey": True, "sap_points": 1},
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 3
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 1},
+                {"type": "suspended_floor_insulation", "cost": None, "sap_points": 1},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 6},
+            ]
+        },
+        {
+            # 5 Grove Mansions
+            "uprn": 121016124,
+            "recommendations": [
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 5
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 2},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 8},
+            ]
+        },
+        {
+            # 14 Grove Mansions
+            "uprn": 121016117,
+            "recommendations": [
+                {"type": "draught_proofing", "cost": 123, "survey": True, "sap_points": 1},
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 4
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 1},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 6},
+            ]
+        },
+        {
+            # 19 Grove Mansions
+            "uprn": 10024087902,
+            "recommendations": [
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 0},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 2},
+                {"type": "room_roof_insulation", "cost": None, "survey": True, "sap_points": 16},
+            ]
+        },
+    ]
+
+    asset_list = [
+        {
+            "uprn": 121016121, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 10024087855, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 121016128, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 121016124, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 121016117, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 10024087902, "address": "", "postcode": ""
+        },
+    ]
+    asset_list = pd.DataFrame(asset_list)
+
+    filename = f"{8}/{portfolio_id}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # TODO Create asset list
+    # TODO: Store asset list & non_intrusive_recommendations
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{8}/{portfolio_id}/non_invasive_recommendations.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_intrusive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # This is the first scenario which includes the first batch of recommendations
+    body1 = {
+        "portfolio_id": str(portfolio_id),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "inclusions": [
+            "draught_proofing", "mixed_glazing", "trickle_vents", "low_energy_lighting",
+        ],
+        "budget": None,
+        "scenario_name": "Quick wins - do now while tenanted",
+        "multi_plan": True,
+    }
+
+    # This is the second scenario which includes the second batch of recommendations
+    body2 = {
+        "portfolio_id": str(portfolio_id),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "inclusions": [
+            "draught_proofing",
+            "mixed_glazing",
+            "trickle_vents",
+            "low_energy_lighting",
+            "suspended_floor_insulation",
+            "internal_wall_insulation"
+        ],
+        "budget": None,
+        "scenario_name": "Do when void",
+        "multi_plan": True,
+    }
+
+    print(body1)
+    print(body2)
--- a/etl/energy_efficiency/app.py
+++ b/etl/energy_efficiency/app.py
@ -0,0 +1,90 @@
+import inspect
+import pandas as pd
+from tqdm import tqdm
+from pathlib import Path
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+    # For EPCs lodged from 2020 onwards, this collects data on the energy efficiency categories for wall insulation
+    # so that when we simulate, we know what the resulting energy efficiency category will be
+
+    epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+
+    date_cutoff = "2020-01-01"
+    walls_data = []
+    ashp_data = []
+    for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
+        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+        # Rename the columns to the same format as the api returns
+        data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
+        insulated_walls = data[
+            data["walls-description"].isin(
+                [
+                    "Cavity wall, filled cavity",
+                    "Solid brick, with internal insulation",
+                    "Solid brick, with external insulation",
+                ]
+            )
+        ]
+        insulated_walls = insulated_walls[~pd.isnull(insulated_walls["uprn"])]
+        insulated_walls = insulated_walls[
+            pd.to_datetime(insulated_walls["lodgement-date"]) >= date_cutoff
+            ]
+
+        ashp = data[
+            data["mainheat-description"] == "Air source heat pump, radiators, electric"
+            ]
+        ashp = ashp[~pd.isnull(ashp["uprn"])]
+        ashp = ashp[
+            pd.to_datetime(ashp["lodgement-date"]) >= date_cutoff
+            ]
+
+        walls_data.append(insulated_walls)
+        ashp_data.append(ashp)
+
+    walls_df = pd.concat(walls_data)
+    ashp_df = pd.concat(ashp_data)
+
+    ashp_agg = (
+        ashp_df.
+        groupby(
+            ["construction-age-band", "mainheat-description", "mainheatcont-description", "mainheat-energy-eff",
+             "mainheatc-energy-eff"]
+        )
+        .size()
+        .reset_index()
+    )
+    ashp_agg = ashp_agg[
+        ashp_agg["mainheatcont-description"].isin(
+            ["Programmer, TRVs and bypass", "Time and temperature zone control"]
+        )
+    ]
+
+    aggregations = {}
+    for description in [
+        "Cavity wall, filled cavity", "Solid brick, with internal insulation", "Solid brick, with external insulation"
+    ]:
+        aggregation = walls_df[
+            walls_df["walls-description"] == description
+            ].groupby(
+            ["construction-age-band", "walls-energy-eff"]
+        ).size().reset_index().rename(columns={0: "count"})
+
+        # For each grouping of age band, we use the most populus energy efficiency category
+        aggregation_deduped = aggregation.sort_values("count", ascending=False).drop_duplicates("construction-age-band")
+        aggregations[description] = aggregation_deduped
+
+    # Since these tables are small, we just convert them to python dictionaries
+    # This data is just held in the wall_energy_efficiency_values script, rather than s3
+    df1 = aggregations["Cavity wall, filled cavity"]
+    df2 = aggregations["Solid brick, with internal insulation"]
+    df3 = aggregations["Solid brick, with external insulation"]
+
+    df1.to_dict("records")
+    df2.to_dict("records")
+    df3.to_dict("records")
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
--- a/etl/ownership/README.md
+++ b/etl/ownership/README.md
@ -0,0 +1,10 @@
+# Ownership Application
+
+This application contains methods that allows us to attempt to discover
+corporate ownership of properties, where possible.
+
+Practically, it's likely that the code within this application will be
+exported into other areas of this repository, and used to assemble
+pipelines that solve specific property ownership questions, and so this
+codebase is set up with the goal of providing farily easy to use, plug
+and play tools.
--- a/etl/ownership/config.py
+++ b/etl/ownership/config.py
@ -0,0 +1,35 @@
+# These are the registration numbers for companies we've heard a reponse from, and cannot sell
+OWNERS_WHO_CANT_SELL = [
+    # Al Rayan - they're the senior lender, not able to sell
+    "4483430",
+    # Ultrabarn - they're unwilling to sell and will sort any retrofits themselves
+    "2794851",
+    # Mountview - Anna spoke with someone from Mounview - they acquire tenancies and sell them as soon as they become
+    # vacant. They have no immediate opportunities but we may come back and remove this
+    "328090",
+]
+
+EXCLUDED_UPRNS = [
+    # This property no longer exists
+    200003827624,
+    # This property doesn't seem to exist
+    90070698,
+    # Can't really find a solid record on Zoopla/Rightmove
+    10090437990,
+    # This property doesn't seem to exist
+    100070902790,
+    # This property doesn't seem to exist
+    100070902791,
+    # This property doesn't seem to exist
+    100031997775,
+    # Can't find reliable information to this property on zoopla/rightmove
+    200001372608,
+    # Can't find reliable information to this property on zoopla/rightmove
+    100031592801,
+    # Can't find reliable information to this property on zoopla/rightmove
+    100031579087,
+    # Can't find reliable information to this property on zoopla/rightmove
+    200000877273,
+    # Can't find reliable information to this property on zoopla/rightmove - seems like a post office!
+    100071391639
+]
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@ -0,0 +1,181 @@
+import datetime
+
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.models.portfolio import Portfolio, PortfolioUsers
+from etl.ownership.Ownership import Ownership
+from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS, EXCLUDED_UPRNS
+from utils.s3 import save_csv_to_s3
+
+# Set up the project configuration
+USER_IDS = [
+    2,  # Khalim
+    3,  # Chenai
+    5,  # Anna
+    30,  # Patricia
+]
+
+EPC_PATHS = [
+    "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
+    #
+    "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
+    # East midlands
+    "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
+]
+
+DOMESTIC_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv"
+OVERSEAS_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv"
+LAND_REGISTRY_PATH = "/Users/khalimconn-kowlessar/Downloads/pp-complete.csv"
+
+PROJECT_NAME = "Midlands Portfolio"
+DATA_BUCKET = "retrofit-data-dev"
+
+# We use this as a rough figure, which helps us shape the portfolio
+PROPERTY_VALUE_ESTIMATE = 200_000
+# We want a 50m portfolio, but we create a bigger portfolio that needed, since properties will be filtered out
+PORTFOLIO_VALUE = 75_000_000
+
+
+def create_sfr_portfolio(project_name, user_ids, status, goal):
+    session = sessionmaker(bind=db_engine)()
+    try:
+        session.begin()
+
+        # Check for an existing portfolio by name
+        portfolio = session.query(Portfolio).filter_by(name=project_name).one_or_none()
+
+        if portfolio:
+            # Fetch the associated users
+            existing_user_ids = {
+                pu.user_id for pu in session.query(PortfolioUsers.user_id).filter_by(portfolioId=portfolio.id)
+            }
+
+            # Check if the specified user_ids match any existing associations
+            if existing_user_ids.intersection(set(user_ids)):
+                print("Portfolio already exists under this name, for specified users.")
+            else:
+                print("Portfolio already exists under this name, for different users.")
+                session.rollback()  # No changes to be committed
+                return None  # Optional: You could also update the user associations here if needed
+
+            return portfolio  # Return the existing portfolio data
+
+        # If portfolio does not exist, create a new one with the provided status and goal
+        new_portfolio = Portfolio(name=project_name, status=status, goal=goal)
+        session.add(new_portfolio)
+        session.flush()  # Ensures that 'id' is available before committing if needed
+
+        # Create new user associations in PortfolioUsers
+        for user_id in user_ids:
+            new_association = PortfolioUsers(user_id=user_id, portfolioId=new_portfolio.id)  # corrected attribute name
+            session.add(new_association)
+
+        session.commit()
+        print(f"New portfolio created with ID: {new_portfolio.id}")
+        return new_portfolio
+
+    except Exception as e:
+        session.rollback()  # Ensure no partial changes are committed
+        print(f"An error occurred: {e}")
+        raise
+
+    finally:
+        session.close()
+
+
+def app():
+    epc_column_filters = {
+        "CURRENT_ENERGY_RATING": ["F", "G"]
+    }
+
+    ownership_instance = Ownership(
+        epc_paths=EPC_PATHS,
+        domestic_ownership_path=DOMESTIC_OWNERSHIP_PATH,
+        overseas_ownership_path=OVERSEAS_OWNERSHIP_PATH,
+        land_registry_path=LAND_REGISTRY_PATH,
+        project_name=PROJECT_NAME,
+        bucket=DATA_BUCKET,
+        average_property_value=PROPERTY_VALUE_ESTIMATE,
+        portfolio_value=PORTFOLIO_VALUE,
+        excluded_owners=EXCLUDED_OWNERS,
+        excluded_uprns=EXCLUDED_UPRNS
+    )
+    ownership_instance.pipeline(column_filters=epc_column_filters)
+
+    # Create the project, if a portfolio doesn't exist for the project name
+
+    # Create the asset list and the body of the portfolio
+    asset_list = ownership_instance.get_asset_list()
+
+    # Create the portfolio
+    # TODO: Wasn't working
+    # create_sfr_portfolio(project_name=PROJECT_NAME, user_ids=USER_IDS, status="scoping", goal="Increasing EPC")
+
+    portfolio_id = 99
+    user_id = 8
+
+    filename = f"{user_id}/{portfolio_id}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(portfolio_id),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "scenario_name": "Hit EPC C",
+        "multi_plan": True,
+        "exclusions": ["fireplace", "floor_insulation"],
+        "budget": None,
+    }
+    print(body)
+
+    # # We read in the current valuation data and identify if there are any uprns that need to be added
+    # previous_valuations = pd.read_excel(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
+    # missed = asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
+    # missed.to_csv("missed_valuations.csv")
+
+    # We now need a distinct step to prepare final outputs
+    portfolio_timestamp = "2024-08-20 19:51:33.884145"
+
+    # Create a date in the yyyy-mm-dd format to store the data against
+    storage_date = datetime.datetime.now().strftime("%Y-%m-%d")
+
+    ownership_instance.create_final_outputs(
+        portfolio_timestamp=portfolio_timestamp, storage_date=storage_date, exclusion_uprns=EXCLUDED_UPRNS
+    )
--- a/etl/spatial/OpenUprnClient.py
+++ b/etl/spatial/OpenUprnClient.py
@ -3,7 +3,8 @@ from tqdm import tqdm
 import pandas as pd
 import geopandas as gpd
 from utils.logger import setup_logger
-from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet
+from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from backend.Property import Property

 logger = setup_logger()

@ -116,3 +117,81 @@ class OpenUprnClient:
            file_key=file_key,
            bucket_name=bucket_name
        )
+
+    @staticmethod
+    def make_uprn_map(uprns, uprn_filenames):
+        """
+        Given a list of UPRNs, this method will return a map of the UPRN to the filename that the UPRN is contained in
+        :param uprns: List of UPRNs
+        :param uprn_filenames: Lookup from UPRN range to filename
+        :return:
+        """
+        uprn_map = {}
+        for uprn in uprns:
+            filtered_df = uprn_filenames[
+                (uprn_filenames["lower"] <= int(uprn))
+                & (uprn_filenames["upper"] >= int(uprn))
+                ]
+            if filtered_df["filenames"].values[0] in uprn_map:
+                uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
+            else:
+                uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
+
+        return uprn_map
+
+    @classmethod
+    def set_spatial_data(cls, input_properties: list[Property], bucket_name):
+        """
+        Given a list of properties, this method will set the spatial data for each property
+        The method will look for the minimal set of uprn datasets that it needs to read in to get all of the spatial
+        data for the properties
+        """
+
+        uprn_filenames = read_dataframe_from_s3_parquet(
+            bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
+        )
+
+        uprns = [p.uprn for p in input_properties]
+        uprn_map = cls.make_uprn_map(uprns, uprn_filenames)
+
+        for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
+            # Read in the file
+            spatial_data = read_dataframe_from_s3_parquet(
+                bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+            )
+
+            spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
+            for p in input_properties:
+                if p.uprn in associated_uprn:
+                    p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn])
+
+        # Perform a final check to ensure that all properties have spatial data
+        for p in input_properties:
+            if p.spatial is None:
+                raise Exception(f"Property with UPRN {p.uprn} does not have spatial data")
+
+        return input_properties
+
+    @classmethod
+    def get_spatial_data(cls, uprns: list[int], bucket_name):
+        """
+        Similar method to set_spatial_data, but designed to work more generally on a list of uprns
+        :return:
+        """
+        uprn_filenames = read_dataframe_from_s3_parquet(
+            bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
+        )
+
+        uprn_map = cls.make_uprn_map(uprns, uprn_filenames)
+
+        uprn_spatial_table = []
+        for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
+            # Read in the file
+            spatial_data = read_dataframe_from_s3_parquet(
+                bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+            )
+
+            spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
+            uprn_spatial_table.append(spatial_df)
+
+        return pd.concat(uprn_spatial_table)
--- a/etl/testing_data/bills_model_testing.py
+++ b/etl/testing_data/bills_model_testing.py
@ -0,0 +1,287 @@
+# We use some sample properties from Newhaven to use as a testing dataset for implementing the model fixes
+
+
+import inspect
+import pandas as pd
+from etl.epc.settings import EARLIEST_EPC_DATE
+from pathlib import Path
+from utils.s3 import save_csv_to_s3
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+
+USER_ID = 8
+PORTFOLIO_ID = -1
+
+
+def app():
+    """
+    This application is tasked with pulling a large quantity of data from the find my epc website, containing the
+    estimated energy consumption for properties
+    :return:
+    """
+
+    lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
+
+    data = pd.read_csv(lewes_directory, low_memory=False)
+    # Rename the columns to the same format as the api returns
+    data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
+    # Take just date before the date threshold
+    data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+    data = data[~pd.isnull(data["uprn"])]
+    data = data[data["current-energy-efficiency"].astype(float) < 52]
+    data = data.sample(10)
+
+    # Create an asset list
+    asset_list = data[["uprn", "address1", "postcode"]].copy().rename(columns={"address1": "address"})
+    asset_list["uprn"] = asset_list["uprn"].astype(str)
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
+
+
+# This is some temp code, which is for diagnosing the issues with the bills models
+heating_training_data_filepath = "sap_change_model/2024-08-06-11-19-49/dataset_rooms.parquet"
+
+# For the heating model:
+heating_drop_columns = [
+    "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
+    "lighting_cost_ending", "hot_water_cost_ending",
+    # "days_to_ending", "days_to_starting",  # TODO This is in the live version
+    'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
+    'number_heated_rooms_ending',
+    'number_habitable_rooms', 'number_heated_rooms'
+]
+
+heating_response = "heating_cost_ending"
+
+# for the hot water model (older dataset)
+hot_water_training_data_filepath = "sap_change_model/2024-07-10-20-28-54/dataset_rooms.parquet"
+
+hot_water_drop_columns = [
+    "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
+    "lighting_cost_ending", "heating_cost_ending",
+    "days_to_starting", "days_to_ending",
+    'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
+    'number_heated_rooms_ending',
+    'number_habitable_rooms', 'number_heated_rooms'
+]
+
+# Diagnose heating
+from utils.s3 import read_dataframe_from_s3_parquet
+
+train = read_dataframe_from_s3_parquet(
+    bucket_name="retrofit-data-dev",
+    file_key=heating_training_data_filepath
+)
+
+# Drop the columns that aren't used
+train = train.drop(columns=heating_drop_columns)
+
+# if the value is postive, it means the ending cost is bigger than the starting (which means it got more expensive)
+train["cost_diference"] = (train["heating_cost_ending"] - train["heating_cost_starting"])
+change_direction = train["cost_diference"] > 0
+change_direction.value_counts(normalize=True)
+
+average_costs_by_time_starting = train.groupby(
+    ["lodgement_year_starting", "lodgement_month_starting"]
+)["heating_cost_starting"].mean().reset_index().sort_values(["lodgement_year_starting", "lodgement_month_starting"])
+
+average_costs_by_time_ending = train.groupby(
+    ["lodgement_year_ending", "lodgement_month_ending"]
+)["heating_cost_ending"].mean().reset_index().sort_values(["lodgement_year_ending", "lodgement_month_ending"])
+
+# Check by photo supply values - if the property is gas, solar panels won't have an affect on the heating or hot
+# water so let's look for electric homes
+# Across the entire dataset, there is no correlation
+# Even for electric properties, there is no correlation
+photo_supply_averages = train[
+    train["fuel_type_ending"] == "electricity"
+    ].groupby(["photo_supply_ending"])["heating_cost_ending"].mean().reset_index()
+
+photo_supply_to_size = train.groupby("photo_supply_ending")["total_floor_area_ending"].mean().reset_index()
+photo_supply_to_size[["photo_supply_ending", "total_floor_area_ending"]].corr()
+train[["total_floor_area_ending", "heating_cost_ending"]].corr()
+# Bigger properties end up with smaller photo_supply values. This will be because the array size likely remains fairly
+# consistent but takes up a smaller proportion of the roof. Typically, the bigger the floor area, the higher the heating
+# costs, but bigger units also have smaller photo_supply
+adding_solar = train[
+    (train["photo_supply_ending"] > 0) & (train["photo_supply_starting"] == 0)
+    ]
+is_positive = (adding_solar["cost_diference"] > 0)
+is_positive.value_counts(normalize=True)
+
+photo_supply_by_time = (
+    train[
+        train["fuel_type_ending"] == "electricity"
+        ].groupby(
+        ["lodgement_year_ending", "photo_supply_ending"]
+    )["heating_cost_ending"].mean().reset_index().sort_values(
+        ["lodgement_year_ending", "photo_supply_ending"], ascending=True)
+)
+# Plot
+photo_supply_by_time[["photo_supply_ending", "heating_cost_ending"]].corr()
+photo_supply_by_time.plot()
+
+# Observations
+# 1) We retain all of the potential columns, however they are just based on the starting EPC
+# 2) 21% of the the time, the ending heating cost is more than the starting but this is clearly a minority
+# 3) Let's get ride of estimated perimeter starting and ending
+
+# Things I should check
+# 1) Do we updated the lodgment_year_ending and lodgement_month_ending
+# 2) Should we adjust costs to now, as well as lodgement_dates to today? Since 2023, costs have increased a lot so
+#    any savings should be benchmarked against what a customer is paying now
+# 3) It might make sense to create a feature between floor area and photo supply, to give a more consistent estimate
+#    of a panel size for the property
+
+# Get an example and score with the models
+example = train[
+    (train["photo_supply_starting"] == 0) &
+    (train["photo_supply_ending"] > 0) &
+    (train["heating_cost_starting"] > train["heating_cost_ending"])
+    ].sample(1)
+
+# example["lodgement_month_starting"]
+# example["lodgement_year_starting"]
+# example["lodgement_month_ending"]
+# example["lodgement_year_ending"].values[0]
+#
+# example["lodgement_year_ending"] = 2023
+# example["days_to_ending"] = 3500
+# example["days_to_starting"]
+
+# {'heating_cost_predictions':    predictions
+# 0        378.5}
+resp = model_api.predict_all(
+    df=example,
+    bucket="retrofit-data-dev",
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_cost_predictions"],
+    extract_ids=False
+)
+
+# Step 1: get a cost for today
+p.create_base_difference_epc_record(cleaned)
+cwi_impact = p.base_difference_record.df.copy()
+for k in property_recommendations[0][0]["simulation_config"]:
+    cwi_impact[k] = property_recommendations[0][0]["simulation_config"][k]
+
+# 2212.4 - Baseline
+today = model_api.predict_all(
+    df=p.base_difference_record.df.copy(),
+    bucket="retrofit-data-dev",
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_cost_predictions"],
+    extract_ids=False
+)
+
+# impact of CWI - 1908
+cwi_response = model_api.predict_all(
+    df=cwi_impact,
+    bucket="retrofit-data-dev",
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_cost_predictions"],
+    extract_ids=False
+)
+
+pv_impact = cwi_impact.copy()
+pv_impact["photo_supply_ending"] = 50
+pv_impact["heating_cost_starting"] = 2212.4
+
+pv_response = model_api.predict_all(
+    df=pv_impact,
+    bucket="retrofit-data-dev",
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_cost_predictions"],
+    extract_ids=False
+)
+
+# Testing kwh for vde
+base_prediction = model_api.predict_all(
+    df=epcs_for_scoring,
+    bucket=get_settings().DATA_BUCKET,
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_kwh_predictions"],
+    extract_ids=False
+)
+
+cwi_epc = pd.DataFrame([property_scoring_epcs[1].copy()])
+cwi_epc = add_features_from_code(cwi_epc)
+cwi_epc = add_estimate_annual_kwh(cwi_epc)
+# cwi_epc["walls-description"] = "Cavity wall, filled cavity"
+# cwi_epc["walls-energy-eff"] = "Good"
+# cwi_epc["heating-cost-current"] = 1650
+# cwi_epc["current-energy-efficiency"] = 72
+# cwi_epc["current-energy-rating"] = "C"
+# cwi_epc["co2-emissions-current"] = 3.7
+# cwi_epc["energy-consumption-current"] = 121
+# cwi_epc["co2-emiss-curr-per-floor-area"] = 19
+# cwi_epc["photo-supply"] = 0
+# cwi_epc["energy-consumption-current"] =
+# cwi_epc["roof-description"] = "Pitched, 300 mm loft insulation"
+# cwi_epc["roof-energy-eff"] = "Very Good"
+# cwi_epc["heating-cost-current"] = 1264
+
+# "heating-cost-current": rec_impact["epc_heating_cost"],
+#                     "hot-water-cost-current": rec_impact["epc_hot_water_cost"],
+#                     # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
+#                     # per year, we multiply by 1000 to get kg/m²
+#                     "co2-emiss-curr-per-floor-area": round(
+#                         1000 * (rec_impact["carbon"] / self.data["total-floor-area"])
+#                     ),
+#                     "co2-emissions-current": rec_impact["carbon"],
+#                     "current-energy-rating": sap_to_epc(rec_impact["sap"]),
+#                     "current-energy-efficiency": int(np.floor(rec_impact["sap"])),
+#                     "energy-consumption-current": rec_impact["heat_demand"],
+#                     "lighting-cost-current": rec_impact["epc_lighting_cost"],
+#                     "id": "+".join([str(self.id), rec_id])
+
+cwi_prediction = model_api.predict_all(
+    df=cwi_epc,
+    bucket=get_settings().DATA_BUCKET,
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
+    extract_ids=False
+)
+
+# 77 perryn
+starting_heating = 19837.2
+starting_hot_water = 2974.1
+
+ending_heating = 17041.1
+ending_hot_water = 2735.3
+
+# 44 lindlings
+starting_heating = 13327.1
+starting_hot_water = 2349.5
+
+ending_heating = 9672.3
+ending_hot_water = 2030.2
+
+ending_heating = 8695.1
+ending_hot_water = 2437.0
+
+heating_impact = starting_heating - ending_heating
+hot_water_impact = starting_hot_water - ending_hot_water
+total_impact = heating_impact + hot_water_impact
--- a/etl/webscrape/Zoopla.py
+++ b/etl/webscrape/Zoopla.py
@ -0,0 +1,38 @@
+# Initial Code
+
+from seleniumbase import SB
+import time
+
+uprns = [
+    100071297618,
+    100080893397,
+    100060778033,
+    200004793081,
+    100071265143,
+    100071297618,
+    100080893397,
+    100060778033,
+    200004793081,
+    100071265143,
+]
+
+estimate_list = []
+
+for uprn in uprns:
+
+    # Probably can change the timings here
+    time.sleep(5)
+    with SB(uc=True) as sb:
+        sb.uc_open_with_reconnect(
+            f"https://www.zoopla.co.uk/property/uprn/{uprn}/",
+            3,
+        )
+
+        soup = sb.get_beautiful_soup()
+
+        estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
+        # Can change the way we extract the text here
+        estimate_text = (
+            estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"]
+        )
+        estimate_list.append(estimate_text)
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@ -0,0 +1,796 @@
+import re
+import numpy as np
+import usaddress
+from datetime import datetime
+from xml.dom.minidom import parseString
+from backend.app.utils import sap_to_epc
+from etl.xml_survey_extraction.pcdb import heating_data
+
+PROPERTY_TYPE_LOOKUP = {
+    "0": "House",
+    "House": "House",
+    "2": "Flat"
+}
+
+
+def get_house_number(address: str) -> str | None:
+    """
+    This method will use the usaddress library to parse an address and extract the house number
+    :return:
+    """
+
+    parsed = usaddress.parse(address)
+    parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
+    parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
+
+    if parsed_house_number is None:
+        # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
+        # we also add a custom approach
+
+        # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
+        pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
+
+        match = re.search(pattern, address)
+
+        if match:
+            # Return the first non-None group found
+            return next(g for g in match.groups() if g is not None)
+        else:
+            return None
+
+    # Remove training commas
+    parsed_house_number = parsed_house_number.replace(",", "")
+
+    return parsed_house_number
+
+
+class XmlParser:
+    epc = {}
+    additional_data = {}
+    uprn = None
+
+    # heating/emissions information
+    space_heating_kwh = None
+    water_heating_kwh = None
+    heating_system = None
+    heating_controls = None
+
+    # Assessor details
+    surveyor_name = None
+
+    number_of_doors = None
+    number_of_insulated_doors = None
+    windows = None
+
+    # Property dimensions
+    number_of_floors = None
+    perimeter = None
+    heat_loss_perimeter = None
+    party_wall_length = None
+    total_floor_area = None
+    floor_height = None
+    insulation_wall_area = None
+
+    floor_dimensions = None
+
+    # The age band lookup is based on the country code
+    AGE_BAND_LOOKUP = {
+        # England & Wales
+        "EAW": {
+            "A": "England and Wales: before 1900",
+            "B": "England and Wales: 1900-1929",
+            "C": "England and Wales: 1930-1949",
+            "D": "England and Wales: 1950-1966",
+            "E": "England and Wales: 1967-1975",
+            "F": "England and Wales: 1976-1982",
+            "G": "England and Wales: 1983-1990",
+            "H": "England and Wales: 1991-1995",
+            "I": "England and Wales: 1996-2002",
+            "J": "England and Wales: 2003-2006",
+            "K": "England and Wales: 2007-2011",
+            "L": "England and Wales: 2012 onwards",
+        }
+    }
+
+    RATINGS_MAP = {
+        "0": "N/A",
+        "1": "Very Poor",
+        "2": "Poor",
+        "3": "Average",
+        "4": "Good",
+        "5": "Very Good"
+    }
+
+    MECHANICAL_VENTILATION_MAP = {
+        "0": "natural"
+    }
+
+    BUILT_FORM_MAP = {
+        "1": "Detached",
+        "3": "End-Terrace",
+        "4": "Mid-Terrace",
+    }
+
+    GLAZED_AREA_MAP = {
+        "4": "Much More Than Typical"
+    }
+
+    FUEL_TYPE_MAP = {
+        "26": "mains gas (not community)"
+    }
+
+    TRANSACTION_TYPE_MAP = {
+        "13": "ECO assessment"
+    }
+
+    TENURE_MAP = {
+        "1": "Owner-occupied",
+        "2": "Rented (social)",
+        "3": "Rented (private)",
+    }
+
+    TARIFF_MAP = {
+        "1": "Dual",
+        "2": "Single"
+    }
+
+    def __init__(self, file, filekey, surveyor_company, uprn=None):
+        file.seek(0)  # Ensure the file pointer is at the beginning
+        xml_string = file.read().decode('utf-8')
+        self.xml = parseString(xml_string)
+        self.filekey = filekey
+        self.surveyor_company = surveyor_company
+
+        # We check if we have a lig xml or rdsap xml
+        # We look for the presence of the Schema-Version-Original tag
+        self.is_lig = len(self.xml.getElementsByTagName("Schema-Version-Original")) > 0
+
+        self.get_uprn(uprn)
+
+    @staticmethod
+    def get_node(node):
+        """
+        Utility function to get the node value from the xml, where data might be optional
+        :return:
+        """
+
+        node_first_child = node.firstChild
+        if node_first_child is None:
+            return None
+
+        return node_first_child.nodeValue
+
+    def run(self):
+
+        if not self.is_lig:
+            return
+
+        self.get_assessor_details()
+
+        self.get_heating_and_emissions_data()
+
+        # self.get_detailed_heating_specs()
+
+        # Building fabric
+        self.get_doors()
+
+        self.get_floor_dimensions()
+
+        self.get_windows()
+
+        # Get all of the EPC data
+        self.extract_epc()
+
+        # Put together all of the additional data we capture
+        self.extract_additional_data()
+
+    def _parse_heat_loss_corridor(self):
+        hlc_lookup = {"2": "unheated corridor", "Unheated": "unheated corridor"}
+        if self.is_lig:
+            heat_loss_corridor = self.get_node_value('Heat-Loss-Corridor')
+        else:
+            # For some reason, this tag is spelt incorrectly in the rdsap xml
+            heat_loss_corridor = self.get_node_value('FlatCoridor')
+        return hlc_lookup[heat_loss_corridor]
+
+    def _parse_heat_loss_corridor_length(self):
+        if self.is_lig:
+            return self.get_node_value('Unheated-Corridor-Length')
+        return self.get_node_value('FlatShelteredWallLength')
+
+    def _parse_flat_storey_count(self):
+        # in the EPR the tag is Storeys
+        if self.is_lig:
+            storeys = None
+        else:
+            storeys = self.get_node_value('Storeys')
+        return storeys
+
+    def _parse_flat_top_storey(self):
+        if self.is_lig:
+            return self.get_node_value('Top-Storey')
+        return None
+
+    def _parse_floor_level(self):
+        if self.is_lig:
+            flat_details = self.xml.getElementsByTagName('SAP-Flat-Details')[0]
+            return flat_details.getElementsByTagName("Level")[0].firstChild.nodeValue
+        return None
+
+    def extract_epc(self):
+
+        if self.floor_dimensions is None:
+            raise ValueError("Run get_floor_dimensions() first")
+
+        if self.windows is None:
+            raise ValueError("Run get_windows() first")
+
+        property_type = self.get_property_type()
+
+        if property_type == "Flat":
+            heat_loss_corridor = self._parse_heat_loss_corridor()
+            unheated_corridor_length = self._parse_heat_loss_corridor_length()
+            flat_storey_count = self._parse_flat_storey_count()
+            flat_top_storey = self._parse_flat_top_storey()
+            floor_level = self._parse_floor_level()
+
+        else:
+            heat_loss_corridor = "NO DATA!"
+            unheated_corridor_length = ""
+            flat_storey_count = ""
+            flat_top_storey = ""
+            floor_level = "NO DATA!"
+
+        floor_height = np.mean([
+            float(x['room_height']) for x in self.floor_dimensions if
+            x['building_part_identifier'] == 'Main Dwelling' and not x['room_roof']
+        ])
+
+        # Take the most prevelant glazing type
+        glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
+        glazed_type = max(glazed_type, key=glazed_type.count)
+
+        energy_tariff = (
+            self.xml.getElementsByTagName("SAP-Energy-Source")[0]
+            .getElementsByTagName("Meter-Type")[0]
+            .firstChild.nodeValue
+        )
+        energy_tariff = self.TARIFF_MAP[energy_tariff]
+
+        self.epc = {
+            "uprn": self.uprn,
+            "uprn-source": "Address Matched",
+            "property-type": property_type,
+            "building-reference-number": "",
+            **self.get_sap(),
+            **self.get_property_address(),
+            "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
+            "construction-age-band": self.AGE_BAND_LOOKUP[
+                self.get_node_value('Country-Code')
+            ][self.get_node_value('Construction-Age-Band')],
+            "mainheat-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
+            ],
+            "windows-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Window', 'Environmental-Efficiency-Rating')
+            ],
+            "lighting-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
+            ],
+            "environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
+            "mainheatcont-description":
+                self.get_property_summary_value('Main-Heating-Controls', 'Description'),
+            "sheating-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating')
+            ],
+            "local-authority": "",  # Not included in the xml
+            "local-authority-label": "",
+            "fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
+            "energy-tariff": energy_tariff,
+            "mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
+            "solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
+            "co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
+            "number-heated-rooms": self.get_node_value('Heated-Room-Count'),
+            "floor-description": self.get_property_summary_value('Floor', 'Description'),
+            "energy-consumption-potential": self.get_energy_assessment_value('Energy-Consumption-Potential'),
+            "built-form": self.BUILT_FORM_MAP[self.get_node_value('Built-Form')],
+            "number-open-fireplaces": self.get_node_value('Open-Fireplaces-Count'),
+            "windows-description": self.get_property_summary_value('Window', 'Description'),
+            "glazed-area": self.GLAZED_AREA_MAP[self.get_node_value('Glazed-Area')],
+            "inspection-date": self.get_node_value('Inspection-Date'),
+            "mains-gas-flag": self.get_node_value('Mains-Gas'),
+            "co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'),
+            "heat-loss-corridor": heat_loss_corridor,
+            "unheated-corridor-length": unheated_corridor_length,
+            "flat-storey-count": flat_storey_count,
+            "roof-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating')
+            ],
+            "total-floor-area": self.get_node_value('Total-Floor-Area'),
+            "environment-impact-current": self.get_energy_assessment_value('Environmental-Impact-Current'),
+            "roof-description": self.get_property_summary_value('Roof', 'Description'),
+            "floor-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Floor', 'Energy-Efficiency-Rating')
+            ],
+            "number-habitable-rooms": self.get_node_value('Habitable-Room-Count'),
+            "hot-water-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Hot-Water', 'Environmental-Efficiency-Rating')
+            ],
+            "mainheatc-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Main-Heating-Controls', 'Energy-Efficiency-Rating')
+            ],
+            "main-fuel": self.FUEL_TYPE_MAP[self.get_node_value('Main-Fuel-Type')],
+            "lighting-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Lighting', 'Environmental-Efficiency-Rating')
+            ],
+            "windows-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Window', 'Energy-Efficiency-Rating')
+            ],
+            "floor-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Floor', 'Environmental-Efficiency-Rating')
+            ],
+            "sheating-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating')
+            ],
+            "lighting-description": self.get_property_summary_value('Lighting', 'Description'),
+            "roof-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating')
+            ],
+            "walls-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Wall', 'Energy-Efficiency-Rating')
+            ],
+            "photo-supply": self.get_photo_supply(),
+            "lighting-cost-potential": self.get_energy_assessment_value('Lighting-Cost-Potential'),
+            "mainheat-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Main-Heating', 'Environmental-Efficiency-Rating')
+            ],
+            "multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'),
+            "main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'),
+            "flat-top-storey": flat_top_storey,
+            "secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'),
+            "walls-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating')
+            ],
+            "transaction-type": self.TRANSACTION_TYPE_MAP[self.get_node_value('Transaction-Type')],
+            "extension-count": self.get_node_value('Extensions-Count'),
+            "mainheatc-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating')
+            ],
+            "lmk-key": "",  # Doesn't exist for non-EPC xmls
+            "wind-turbine-count": self.get_node_value('Wind-Turbines-Count'),
+            "tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
+            "floor-level": floor_level,
+            "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
+            "potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
+            "hot-water-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
+            ],
+            "low-energy-lighting": self.get_node_value('Low-Energy-Lighting'),
+            "walls-description": self.get_property_summary_value('Wall', 'Description'),
+            "hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'),
+            "co2-emissions-current": self.get_node_value('CO2-Emissions-Current'),
+            "heating-cost-current": self.get_node_value('Heating-Cost-Current'),
+            "heating-cost-potential": self.get_energy_assessment_value('Heating-Cost-Potential'),
+            "hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'),
+            "hot-water-cost-potential": self.get_energy_assessment_value('Hot-Water-Cost-Potential'),
+            "lighting-cost-current": self.get_node_value('Lighting-Cost-Current'),
+            "energy-consumption-current": self.get_node_value('Energy-Consumption-Current'),
+            "lodgement-date": self.get_node_value('Inspection-Date'),
+            "lodgement-datetime":
+                datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
+            "mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
+            "floor-height": floor_height,
+            "glazed-type": glazed_type,
+        }
+
+    def get_insulation_wall_area(self):
+        """
+        Extracts the insulation wall area for the main dwelling
+
+        Note that this doesn't include any extensions. We don't have recommendations for extensions right now, so we
+        don't currently calculate the insulation wall area for them, since it's not used in the recommendations.
+
+        """
+
+        main_dwelling_floors = [
+            f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
+        ]
+        main_dwelling_windows = [
+            w for w in self.windows if w["window_location"] == "0"
+        ]
+
+        wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
+        window_areas = sum([float(w["window_area"]) for w in main_dwelling_windows])
+        return wall_areas - window_areas
+
+    def extract_additional_data(self):
+
+        self.insulation_wall_area = self.get_insulation_wall_area()
+
+        # We pull this out which is used as the insulation floor area
+        main_dwelling_ground_floor_area = [
+            f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and f["floor"] == "0"
+        ][0]["total_floor_area"]
+
+        main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"]
+
+        number_of_windows = len(main_dwelling_windows)
+        windows_area = sum([float(w["window_area"]) for w in main_dwelling_windows])
+
+        boolean_lookup = {
+            "true": True,
+            "false": False,
+            "Y": True,
+            "N": False
+        }
+
+        cylinder_insulation_type = {
+            None: "",
+            "1": "Foam",
+        }
+
+        cylinder_insulation_thickness = int(
+            self.get_node_value('Cylinder-Insulation-Thickness')
+        ) if self.get_node_value('Cylinder-Insulation-Thickness') else None
+
+        cylinder_thermostat = boolean_lookup[self.get_node_value('Cylinder-Thermostat')] \
+            if self.get_node_value('Cylinder-Thermostat') else None
+
+        self.additional_data = {
+            "file_location": self.filekey,
+            "surveyor_name": self.surveyor_name,
+            "surveyor_company": self.surveyor_company,
+            "space_heating_kwh": self.space_heating_kwh,
+            "water_heating_kwh": self.water_heating_kwh,
+            # "heating_system": self.heating_system,
+            # "heating_controls": self.heating_controls,
+            "number_of_doors": self.number_of_doors,
+            "number_of_insulated_doors": self.number_of_insulated_doors,
+            "number_of_floors": self.number_of_floors,
+            "insulation_wall_area": self.insulation_wall_area,
+            "heat_loss_perimeter": self.heat_loss_perimeter,
+            "party_wall_length": self.party_wall_length,
+            "perimeter": self.perimeter,
+            "rooms_with_bath_and_or_shower": int(self.get_node_value('Rooms-With-Bath-And-Or-Shower')),
+            "rooms_with_mixer_shower_no_bath": int(self.get_node_value('Rooms-With-Mixer-Shower-No-Bath')),
+            "room_with_bath_and_mixer_shower": int(self.get_node_value('Rooms-With-Bath-And-Mixer-Shower')),
+            "percent_draftproofed": int(self.get_node_value('Percent-Draughtproofed')),
+            "has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')],
+            "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')],
+            "cylinder_insulation_thickness": cylinder_insulation_thickness,
+            "cylinder_thermostat": cylinder_thermostat,
+            "main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area),
+            "number_of_windows": int(number_of_windows),
+            "windows_area": float(windows_area),
+        }
+
+    def get_node_value(self, tag_name):
+        nodes = self.xml.getElementsByTagName(tag_name)
+        if nodes and nodes[0].firstChild:
+            return nodes[0].firstChild.nodeValue
+        return None
+
+    def get_node_value_from_floor_dimensions(self, tag_name):
+        nodes = self.xml.getElementsByTagName('SAP-Floor-Dimension')
+        if nodes:
+            tag = nodes[0].getElementsByTagName(tag_name)
+            if tag and tag[0].firstChild:
+                return tag[0].firstChild.nodeValue
+        return None
+
+    def get_property_summary_value(self, section, tag_name):
+        nodes = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName(section)
+        if nodes:
+            tag = nodes[0].getElementsByTagName(tag_name)
+            if tag and tag[0].firstChild:
+                return tag[0].firstChild.nodeValue
+        return None
+
+    def get_energy_assessment_value(self, tag_name):
+        nodes = self.xml.getElementsByTagName('Energy-Assessment')[0]
+        if nodes:
+            tag = nodes.getElementsByTagName(tag_name)
+            if tag and tag[0].firstChild:
+                return tag[0].firstChild.nodeValue
+        return None
+
+    def get_uprn(self, uprn):
+
+        if uprn is not None:
+            self.uprn = uprn
+            return
+
+        uprn_tag = self.xml.getElementsByTagName('UPRN')[0].firstChild
+        if uprn_tag is None:
+            self.uprn = -1
+            return
+
+        self.uprn = uprn_tag.nodeValue
+        # If all of the characters in the UPRN are 0, then there is not set UPRN
+        if self.uprn.count("0") == len(self.uprn):
+            self.uprn = 0
+        else:
+            self.uprn = self.uprn.lower().split("uprn-")[1]
+
+    def get_property_type(self):
+        if not self.xml:
+            raise ValueError("You need to read the file first")
+
+        property_type = self.xml.getElementsByTagName('Property-Type')
+        if not property_type:
+            property_type = self.xml.getElementsByTagName('PropertyType1')
+
+        if len(property_type) > 1:
+            property_types = {PROPERTY_TYPE_LOOKUP[p.firstChild.nodeValue] for p in property_type}
+            if len(property_types) > 1:
+                raise ValueError("Multiple property types found")
+
+            return property_types.pop()
+
+        return PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue]
+
+    def get_sap(self):
+        sap_score = self.xml.getElementsByTagName('Energy-Rating-Current')
+        sap_score = int(sap_score[0].firstChild.nodeValue)
+        epc_rating = sap_to_epc(sap_score)
+
+        return {
+            "current-energy-efficiency": str(sap_score),
+            "current-energy-rating": epc_rating
+        }
+
+    def get_heating_and_emissions_data(self):
+        """
+        This method will extract the following pieces of information:
+        1) Space heating requirement
+        2) Water heating requirement
+        3) CO2 emissions
+        4) Heat demand per square meter per year
+        5) Bills
+
+        :return:
+        """
+
+        self.space_heating_kwh = self.xml.getElementsByTagName(
+            'Space-Heating-Existing-Dwelling'
+        )[0].firstChild.nodeValue
+
+        self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue
+
+    def get_detailed_heating_specs(self):
+        """
+        Given the heating data that is found in the <SAP-Heating> tag, we extract the detailed about the heating
+        system
+        :return:
+        """
+        sap_main_heating_details = (
+            self.xml.getElementsByTagName('SAP-Heating')[0]
+            .getElementsByTagName("Main-Heating-Details")[0]
+            .getElementsByTagName("Main-Heating")[0]
+        )
+
+        heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue
+
+        # Get the heating system
+        heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
+        heating_system = heating_system.values[0] if not heating_system.empty else f"Heating code: {heating_code}"
+
+        # Get the heating controls
+        heating_controls_code = (
+            sap_main_heating_details.getElementsByTagName("Main-Heating-Control")[0].firstChild.nodeValue
+        )
+
+        heating_controls = heating_data[heating_data["code"] == int(heating_controls_code)]["description"]
+        heating_controls = (
+            heating_controls.values[0] if not heating_controls.empty else f"Heating Controls code: {heating_code}"
+        )
+
+        self.heating_system = heating_system
+        self.heating_controls = heating_controls
+
+    def get_doors(self):
+
+        # Doors can be found in the SAP-Property-Details tag
+        self.number_of_doors = int(
+            self.xml.getElementsByTagName('SAP-Property-Details')[0]
+            .getElementsByTagName('Door-Count')[0]
+            .firstChild.nodeValue
+        )
+
+        self.number_of_insulated_doors = int(
+            self.xml.getElementsByTagName('SAP-Property-Details')[0]
+            .getElementsByTagName('Insulated-Door-Count')[0]
+            .firstChild.nodeValue
+        )
+
+    def get_photo_supply(self):
+        photo_supply_tag = self.xml.getElementsByTagName("Photovoltaic-Supply")[0]
+        # Check if the "None-Or-No-Details" tag is present
+        if photo_supply_tag.getElementsByTagName("None-Or-No-Details"):
+            return (
+                photo_supply_tag.
+                getElementsByTagName("None-Or-No-Details")[0].
+                getElementsByTagName("Percent-Roof-Area")[0].
+                firstChild.nodeValue
+            )
+        else:
+            raise NotImplementedError("Implement me")
+
+    def get_assessor_details(self):
+
+        energy_assessor_tag = self.xml.getElementsByTagName('Energy-Assessor')[0]
+
+        self.surveyor_name = (
+            energy_assessor_tag.getElementsByTagName("Name")[0].firstChild.nodeValue
+        )
+
+    def get_property_address(self):
+
+        property_tag = self.xml.getElementsByTagName("Property")[0]
+
+        address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0])
+        address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0])
+        address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0])
+        posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0])
+        postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0])
+        address = ", ".join(
+            [x for x in [address1, address2, address3] if x is not None]
+        )
+        county = property_tag.getElementsByTagName("County")
+        if county:
+            county = county[0].firstChild.nodeValue
+        else:
+            county = ""
+
+        # Seems to be unavailable in the xml
+        constituency = None
+        constituency_label = None
+
+        return {
+            "address1": address1,
+            "address2": address2,
+            "address3": address3,
+            "posttown": posttown,
+            "postcode": postcode,
+            "address": address,
+            "county": county,
+            "constituency": constituency,
+            "constituency-label": constituency_label
+        }
+
+    def get_floor_dimensions(self):
+
+        """
+        Extracts physical measurements of the property such as the floor area, room height, etc.
+        across the main dwelling and any extensions.
+        :return:
+        """
+
+        def get_part_value(node, tag_name):
+            element = node.getElementsByTagName(tag_name)
+            if element and element[0].firstChild:
+                return element[0].firstChild.nodeValue
+            return None
+
+        # Each part will correspond to the main
+        sap_building_parts = self.xml.getElementsByTagName("SAP-Building-Part")
+
+        floor_dimensions = []
+        for building_part in sap_building_parts:
+            building_part_identifier = building_part.getElementsByTagName("Identifier")[0].firstChild.nodeValue
+            sap_floor_dimensions = building_part.getElementsByTagName("SAP-Floor-Dimension")
+
+            data = [
+                {
+                    'building_part_identifier': building_part_identifier,
+                    'floor': get_part_value(floor_dimension, 'Floor'),
+                    'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'),
+                    'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'),
+                    'heat_loss_perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
+                    'party_wall_length': get_part_value(floor_dimension, 'Party-Wall-Length'),
+                    'total_floor_area': get_part_value(floor_dimension, 'Total-Floor-Area'),
+                    'room_height': get_part_value(floor_dimension, 'Room-Height'),
+                    "room_roof": False
+                } for floor_dimension in sap_floor_dimensions
+            ]
+
+            room_roofs = building_part.getElementsByTagName("SAP-Room-In-Roof")
+            room_roof_data = [
+                {
+                    "building_part_identifier": building_part_identifier,
+                    "floor": str(max([int(d["floor"]) for d in data]) + 1),
+                    "floor_construction": "",
+                    "floor_insulation": rr.getElementsByTagName("Insulation")[0].firstChild.nodeValue,
+                    "heat_loss_perimeter": "",
+                    "party_wall_length": "",
+                    "total_floor_area": rr.getElementsByTagName("Floor-Area")[0].firstChild.nodeValue,
+                    "room_height": "",
+                    "room_roof": True
+                } for rr in room_roofs
+            ]
+
+            floor_dimensions.extend(data)
+            floor_dimensions.extend(room_roof_data)
+
+        self.floor_dimensions = floor_dimensions
+
+        self.number_of_floors = len(
+            [f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"]
+        )
+
+        # We extract the maximum heat loss perimeter, per building part
+        max_heat_loss_perimeters = {d['building_part_identifier']: max(
+            (float(x['heat_loss_perimeter']) for x in self.floor_dimensions if
+             x['building_part_identifier'] == d['building_part_identifier'] and x['heat_loss_perimeter']),
+            default=float('-inf')
+        ) for d in self.floor_dimensions}
+
+        self.heat_loss_perimeter = sum(max_heat_loss_perimeters.values())
+
+        max_party_walls = {
+            d['building_part_identifier']: max(
+                (float(x['party_wall_length']) for x in self.floor_dimensions if
+                 x['building_part_identifier'] == d['building_part_identifier'] and x['party_wall_length']),
+                default=float('-inf')
+            ) for d in self.floor_dimensions
+        }
+
+        self.party_wall_length = sum(max_party_walls.values())
+
+        self.perimeter = self.heat_loss_perimeter + self.party_wall_length
+
+    @staticmethod
+    def _parse_windows_content(window, glazing_type_lookup, orientation_lookup):
+
+        # There may not be a pvc frame
+        pvc_frame = window.getElementsByTagName("PVC-Frame")
+        pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
+
+        # There may not be a glazing gap for single glazed windows
+        glazing_gap = window.getElementsByTagName("Glazing-Gap")
+        glazing_gap = glazing_gap[0].firstChild.nodeValue if glazing_gap else None
+
+        parsed = {
+            "window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
+            "window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
+            "window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
+            "glazing_type": glazing_type_lookup[
+                window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
+            ],
+            "pvc_frame": pvc_frame,
+            "glazing_gap": glazing_gap,
+            "orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
+        }
+        return parsed
+
+    def get_windows(self):
+        """
+        Extracts data about the windows in the property, including the number of windows and the window type.
+        :return:
+        """
+
+        sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
+
+        glazing_type_lookup = {
+            "3": "double glazing, unknown install date",
+            "5": "Single glazing",
+        }
+
+        orientation_lookup = {
+            "1": "North",
+            "2": "North East",
+            "3": "East",
+            "4": "South East",
+            "5": "South",
+            "6": "South West",
+            "7": "West",
+            "8": "North West"
+        }
+
+        self.windows = [
+            self._parse_windows_content(
+                window=window,
+                glazing_type_lookup=glazing_type_lookup,
+                orientation_lookup=orientation_lookup
+            ) for window in sap_windows
+        ]
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@ -1,3 +1,142 @@
+from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3
+from utils.logger import setup_logger
+from etl.xml_survey_extraction.XmlParser import XmlParser
+import os
+import pandas as pd
+from io import BytesIO
+
+logger = setup_logger()
+
+BUCKET = "retrofit-energy-assessments-dev"
+USER_ID = 8
+non_invasive_recommendations_filepath = "{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+SCENARIOS = {
+    101: {
+        "project_code": "VEC001",
+        "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
+        "bodies": [
+            # Scenario A: Cavity wall insulation
+            {
+                "portfolio_id": str(101),
+                "housing_type": "Private",
+                "goal": "Increasing EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "inclusions": [
+                    "draught_proofing", "secondary_glazing", "trickle_vents", "low_energy_lighting",
+                ],
+                "budget": None,
+                "scenario_name": "Quick wins - do now while tenanted",
+                "multi_plan": True,
+            },
+            # Scenario B: CWI, Solar PV, AHSP
+            {
+                "portfolio_id": str(101),
+                "housing_type": "Private",
+                "goal": "Increasing EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "inclusions": [
+                    "draught_proofing",
+                    "secondary_glazing",
+                    "trickle_vents",
+                    "low_energy_lighting",
+                    "suspended_floor_insulation",
+                    "internal_wall_insulation"
+                ],
+                "budget": None,
+                "scenario_name": "Do when void",
+                "multi_plan": True,
+            },
+        ]
+    },
+}
+
+# TODO: These non-intrusive recommendations should be detected from the EPRs, the scenarios and the condition report?
+#       For recommendations like trickle vents, we can deduce this from the condition report, depending on the
+#       ventilation of the room and the presence of trickle vents.
+NON_INTRUSITVE_RECOMMENDATIONS = [
+    {
+        # 2 Grove Mansions
+        "uprn": 121016121,
+        "recommendations": [
+            {
+                "type": "draught_proofing",
+                "cost": None,
+                "survey": True
+            },
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "suspended_floor_insulation", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 8 Grove Mansions
+        "uprn": 10024087855,
+        "recommendations": [
+            {"type": "draught_proofing", "cost": None, "survey": True},
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 9 Grove Mansions
+        "uprn": 121016128,
+        "recommendations": [
+            {"type": "draught_proofing", "cost": None, "survey": True},
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "suspended_floor_insulation", "cost": None},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 5 Grove Mansions
+        "uprn": 121016124,
+        "recommendations": [
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 14 Grove Mansions
+        "uprn": 121016117,
+        "recommendations": [
+            {"type": "draught_proofing", "cost": None, "survey": True},
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 19 Grove Mansions
+        "uprn": 121016117,
+        "recommendations": [
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+            {"type": "room_roof_insulation", "cost": None, "survey": True},
+        ]
+    },
+]
+
+
 def main():
    """
    This function executes the main process, which will retrieve data from the specified locations, extract the data
@ -6,4 +145,124 @@ def main():
    """

    # TODO: Build solution to get this data from Onedrive and store what we need in S3
-    #       In s3, we have a bucket called retrofit-energy-assessments-{stage} which
+    #       In s3, we have a bucket called retrofit-energy-assessments-{stage} which contains the data we need
+    #      The data is stored in a folder called {surveyors}/{project_code}/{uprn}
+    #       We'll need to get the uprn from the folder name, which we can do with EpcSearcher class
+
+    # TODO: Pull out county, as in create_epc_records in the router, we pull it from the latest EPC, but we should
+    #       be able to deduce it from just the address. Same for constituency and constituency_label
+
+    # TODO: Store the project code in the database
+    #
+
+    for scenario_config in SCENARIOS.values():
+        energy_assessments = list_files_and_subfolders_in_s3_folder(
+            bucket_name=BUCKET, folder_name=f"{scenario_config['surveyor']}/{scenario_config['project_code']}/"
+        )
+
+        logger.info(
+            f"Found {len(energy_assessments)} energy assessments for {scenario_config['surveyor']} and "
+            f"{scenario_config['project_code']}"
+        )
+        assessments_map = {}
+        for assessment in energy_assessments:
+            uploaded_xmls = list_xmls_in_s3_folder(
+                bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
+            )
+            uprn = int(assessment.rstrip("/").split("/")[-1])
+            assessments_map[uprn] = uploaded_xmls
+
+        logger.info(f"Exatracted XMLS for the energy assessments")
+
+        # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
+        #       the database at onece
+
+        # TODO: We now have detailed information about primary and secondary walls, so we should use this information
+        #       in our recommendations when we have it
+        #       For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions,
+        #       where
+        #       the physical dimensions and the fabric of each building is constructed in a way as if each building is
+        #       separate. We should use this information to make recommendations that are specific to each building
+        #       part, though the problem here is that while the fabric and dimensions are separate, the actual SAP,
+        #       CO2, etc
+        #       figures span across the entire property.
+        #       Idea: We can collect all of this information by building part and store it separately in the database
+        #             against the uprn. We can have key data for the EPC, but then also additional data for each
+        #             building
+        #             part. We can then use this data to make recommendations that are specific to each building part
+        #       We should probably re-think this data model, so we break up the data in a more considered fasion and
+        #       produce
+        #       the underlying EPC data as a summary of the building parts. Not only do we have data against the main
+        #       dwelling and extensions, but we also have multiple windows with individiaul pieces of information that
+        #       we can use to make recommendations. We should store this data in a way that we can easily access it and
+        #       use it to make recommendations (e.g. we should have a Windows table)
+
+        # For each property, we download the xmls and extract the data
+        database_data = []
+        for uprn, xmls in assessments_map.items():
+
+            extracted_data = {}
+            for xml in xmls:
+                xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
+                xml_data_io = BytesIO(xml_data)
+                xml_parser = XmlParser(
+                    file=xml_data_io,
+                    filekey=os.path.join(f"s3://{BUCKET}", xml),
+                    uprn=uprn,
+                    surveyor_company=scenario_config["surveyor"],
+                )
+                xml_parser.run()
+                if xml_parser.is_lig:
+                    logger.info(f"Extracted data from {xml}")
+                extracted_epc = xml_parser.epc
+                extracted_additional_data = xml_parser.additional_data
+
+                data_to_update = {
+                    **extracted_epc, **extracted_additional_data
+                }
+
+                # We need to update the keys to match the database schema - i.e. we should replace all hyphens with
+                # underscores
+                data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
+
+                extracted_data.update(data_to_update)
+
+            database_data.append(extracted_data)
+
+        logger.info("Uploading data to the database")
+        session = sessionmaker(bind=db_engine)()
+        bulk_insert_energy_assessments(session, database_data)
+        session.close()
+
+        # Create the asset list
+        asset_list = [
+            {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data
+        ]
+        asset_list = pd.DataFrame(asset_list)
+
+        # Store the asset list in s3
+        filename = f"{USER_ID}/{scenario_config['bodies'][0]['portfolio_id']}/non_intrusives.csv"
+        save_csv_to_s3(
+            dataframe=asset_list,
+            bucket_name="retrofit-plan-inputs-dev",
+            file_name=filename
+        )
+
+        for body in scenario_config["bodies"]:
+            body["trigger_file_path"] = filename
+            print(body)
+
+    # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
+    #       can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat
+    #                          https://www.ncm-pcdb.org.uk/sap/download
+    #       However retrieving this data is not a priority, so we can leave this for now as parsing the database
+    #       is a non-trivial task
+
+    # TODO: The condition report contains additional data such as the number of bedrooms and the number of bathrooms
+    #       We can extract this data and store it in the database as well. We can then update our kwargs methodology
+    #       that is passed to the property class, where instead we store this additional data in our database (it could
+    #       be stored in the energy assessment table, or in a separate table) and then when we're passed additional data
+    #       we can query the database for this data and use it to update the property object, instead of storing it
+    #       in the asset list and pulling it out of the asset list
+    #       1) Bathrooms
+    #       2) Bedrooms
--- a/etl/xml_survey_extraction/pcdb.py
+++ b/etl/xml_survey_extraction/pcdb.py
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@ -176,7 +176,7 @@ module "retrofit_hotwater_kwh_predictions" {
 }

 module "retrofit_energy_assessments" {
-  source          = "./modules/s3"
+  source          = "./modules/s3_presignable_bucket"
  bucketname      = "retrofit-energy-assessments-${var.stage}"
  allowed_origins = var.allowed_origins
 }
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@ -64,6 +64,8 @@ SMART_APPLIANCE_THERMOSTAT_COST = 400
 PROGRAMMER_COST = 120
 ROOM_THERMOSTAT_COST = 150
 TRVS_COST = 35
+BYPASS_COST = 350  # Based on desktop research for a complex installation
+# https://www.checkatrade.com/blog/cost-guides/cost-install-water-shut-off-valve/

 # Cost for TTZC
 # Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
@ -98,8 +100,8 @@ CONDENSING_BOILER_COSTS = {
 # The unit is a 15kw boiler, capable of outputting between 3kw and 15kw. Costs seem to be around £1800
 ELECTRIC_BOILER_COSTS = 1800

-# Assumes 3 hours to remove each heater (including re-decorating)
-ROOM_HEATER_REMOVAL_COST = 120
+# Assumes 1 hours to remove each heater (including re-decorating)
+ROOM_HEATER_REMOVAL_COST = 50
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3

 # This is a cost quoted by Jim for a system flush - existig system will run more efficiently
@ -1012,7 +1014,7 @@ class Costs:
            "labour_days": labour_days
        }

-    def solar_pv(self, wattage: float, has_battery: bool = False):
+    def solar_pv(self, wattage: float, has_battery: bool = False, array_cost=None):

        """
        Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
@ -1026,13 +1028,17 @@ class Costs:
        https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
        :param wattage: Peak wattage of the solar PV system]
        :param has_battery: Bool, whether the system includes a battery
+        :param array_cost: float, containing the cost of the solar PV array
        """

        # Get the cost data relevant to the region
        regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]

-        kw = wattage / 1000
-        total_cost = kw * regional_cost
+        if array_cost is not None:
+            total_cost = array_cost
+        else:
+            kw = wattage / 1000
+            total_cost = kw * regional_cost

        if has_battery:
            # The battery cost is based on the £3500 quote, recieved from installers
@ -1254,6 +1260,34 @@ class Costs:
            "labour_days": labour_days,
        }

+    def programmer_trvs_bypass(self, number_heated_rooms, has_programmer, has_trvs, has_bypass):
+
+        total_cost = 0
+        labour_hours = 0
+
+        if not has_programmer:
+            total_cost += PROGRAMMER_COST
+            labour_hours += 1
+
+        if not has_trvs:
+            total_cost += TRVS_COST * number_heated_rooms
+            labour_hours += 0.25 * number_heated_rooms
+
+        if not has_bypass:
+            total_cost += BYPASS_COST
+            labour_hours += 0.5
+
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": 1,
+        }
+
    def heater_removal(self, n_rooms):
        """
        Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater
--- a/recommendations/DraughtProofingRecommendations.py
+++ b/recommendations/DraughtProofingRecommendations.py
@ -0,0 +1,56 @@
+from backend.Property import Property
+
+
+class DraughtProofingRecommendations:
+
+    def __init__(self, property_instance: Property):
+        self.property = property_instance
+
+        self.recommendation = []
+
+    def recommend(self):
+        """
+        In some cases, we can identify the need for draught proofing from the EPC recommendations, however the initial
+        implementation of this class will just assume that we are picking up a non-invasive recommendation from the
+        survey
+        """
+
+        # For the moment, draught proofing doesn't have a phase impact
+
+        draught_proofing_recommendation_config = next(
+            (r for r in self.property.non_invasive_recommendations if
+             r["type"] == "draught_proofing"),
+            {}
+        )
+
+        if not draught_proofing_recommendation_config:
+            return
+
+        description = (
+            "Draught proof doors and windows to improve energy efficiency" if
+            not draught_proofing_recommendation_config.get("description")
+            else draught_proofing_recommendation_config["description"]
+        )
+
+        # We recommend installing two mechanical ventilation systems
+        self.recommendation = [
+            {
+                "phase": None,
+                "parts": [],
+                "type": "draught_proofing",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "already_installed": False,
+                "sap_points": draught_proofing_recommendation_config["sap_points"],
+                "heat_demand": 0,
+                "kwh_savings": 0,
+                "co2_equivalent_savings": 0,
+                "energy_cost_savings": 0,
+                "total": draught_proofing_recommendation_config["cost"],
+                # We use a very simple and rough estimate of 4 hours per unit
+                "labour_hours": draught_proofing_recommendation_config.get("labour_hours", 8),
+                "labour_days": draught_proofing_recommendation_config.get("labour_days", 1),  # Assume 8 hour day
+                "survey": True
+            }
+        ]
--- a/recommendations/FireplaceRecommendations.py
+++ b/recommendations/FireplaceRecommendations.py
@ -50,5 +50,8 @@ class FireplaceRecommendations(Definitions):
                # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
                "labour_hours": 6 * number_open_fireplaces,
                "labour_days": 6 * number_open_fireplaces / 8,  # Assume 8 hour day
+                "description_simulation": {
+                    "number-open-fireplaces": 0
+                }
            }
        ]
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@ -5,12 +5,14 @@ import pandas as pd

 from BaseUtility import Definitions
 from datatypes.enums import QuantityUnits
+from backend.app.plan.schemas import MEASURE_MAP
 from backend.Property import Property
 from recommendations.recommendation_utils import (
    r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
-    get_recommended_part, get_floor_u_value, override_costs
+    get_recommended_part, get_floor_u_value, override_costs, check_simulation_difference
 )
 from recommendations.Costs import Costs
+from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes


 class FloorRecommendations(Definitions):
@ -62,18 +64,16 @@ class FloorRecommendations(Definitions):
            ]
        ]

-        self.exposed_floor_insulation_materials = [
-            part for part in materials if part["type"] == "exposed_floor_insulation"
-        ]
+    def recommend(self, phase=0, measures=None):

-        # TODO: To be completed
-        self.exposed_floor_non_insulation_materials = []
+        measures = MEASURE_MAP["floor_insulation"] if measures is None else measures
+
+        if not measures:
+            return

-    def recommend(self, phase=0):
        u_value = self.property.floor["thermal_transmittance"]
        property_type = self.property.data["property-type"]
        floor_area = self.property.insulation_floor_area
-        year_built = self.property.year_built

        if self.property.floor["another_property_below"] | (self.property.floor["insulation_thickness"] in [
            "average", "above average"
@ -94,14 +94,16 @@ class FloorRecommendations(Definitions):

        if u_value:

-            # By being built more recently than this, it means that the property was likely build with soild
-            # concrete floors with insulation already
-            if year_built < self.PART_L_YEAR_CUTOFF:
-                raise NotImplementedError("Not investigated this use case")
-
-            if u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
-                # The floor is already compliant
-                return
+            # In this case where we have the u-value of a floor, we likely don't have any other information about it
+            # so there is no recommendation that we can practically make
+            if (
+                self.property.floor["is_suspended"] or
+                self.property.floor["is_to_unheated_space"] or
+                self.property.floor["is_to_external_air"] or
+                self.property.floor["is_solid"]
+            ):
+                raise ValueError("This should not be possible")
+            return

        if u_value is None:
            u_value = get_floor_u_value(
@ -118,7 +120,11 @@ class FloorRecommendations(Definitions):
        if u_value < self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
            return

-        if self.property.floor["is_suspended"]:
+        if (
+            self.property.floor["is_suspended"] or
+            self.property.floor["is_to_unheated_space"] or
+            self.property.floor["is_to_external_air"]
+        ) and "suspended_floor_insulation" in measures:
            # Given the U-value, we recommend underfloor insulation
            self.recommend_floor_insulation(
                phase=phase,
@ -128,7 +134,7 @@ class FloorRecommendations(Definitions):
            )
            return

-        if self.property.floor["is_solid"]:
+        if self.property.floor["is_solid"] and "solid_floor_insulation" in measures:
            # Given the U-value, we recommend solid floor insulation options which are usually solid foam
            self.recommend_floor_insulation(
                u_value=u_value,
@ -138,10 +144,6 @@ class FloorRecommendations(Definitions):
            )
            return

-        if self.property.floor["is_to_unheated_space"] or self.property.floor["is_to_external_air"]:
-            self.recommend_floor_insulation(u_value=u_value, parts=self.exposed_floor_insulation_parts)
-            return
-
        raise NotImplementedError("Implement me!")

    @staticmethod
@ -197,6 +199,8 @@ class FloorRecommendations(Definitions):
                        if already_installed:
                            cost_result = override_costs(cost_result)

+                        new_description = "Suspended, insulated"
+
                    elif material["type"] == "solid_floor_insulation":
                        cost_result = self.costs.solid_floor_insulation(
                            insulation_floor_area=self.property.insulation_floor_area,
@ -207,9 +211,21 @@ class FloorRecommendations(Definitions):
                        already_installed = "solid_floor_insulation" in self.property.already_installed
                        if already_installed:
                            cost_result = override_costs(cost_result)
+
+                        new_description = "Solid, insulated"
                    else:
                        raise NotImplementedError("Implement me!")

+                    floor_ending_config = FloorAttributes(new_description).process()
+                    floor_simulation_config = check_simulation_difference(
+                        new_config=floor_ending_config, old_config=self.property.floor, prefix="floor_"
+                    )
+
+                    simulation_config = {
+                        **floor_simulation_config,
+                        "floor_thermal_transmittance_ending": new_u_value,
+                    }
+
                    self.recommendations.append(
                        {
                            "phase": phase,
@ -227,6 +243,7 @@ class FloorRecommendations(Definitions):
                            "new_u_value": new_u_value,
                            "sap_points": None,
                            "already_installed": already_installed,
+                            "simulation_config": simulation_config,
                            "description_simulation": {
                                "floor-description": "Solid, insulated" if
                                material["type"] == "solid_floor_insulation"
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@ -40,7 +40,10 @@ class HeatingControlRecommender:
            return

        if heating_description in ["Air source heat pump, radiators, electric"]:
+            # For an ASHP, we can recommend time and temperature zone controls, as well as programmer, trvs and a bypass
+            # which are common configurations for ASHPs
            self.recommend_time_temperature_zone_controls()
+            # self.recommend_programmer_trvs_bypass()

    def recommend_room_heaters_electric_controls(self):
        """
@ -279,3 +282,55 @@ class HeatingControlRecommender:
                "description_simulation": description_simulation
            }
        )
+
+    def recommend_programmer_trvs_bypass(self):
+
+        # We don't perform any checks here - this is likely to be used in conjunction with an ASHP recommendation
+        new_controls_description = "Programmer, TRVs and bypass"
+        ending_config = MainheatControlAttributes(new_controls_description).process()
+        simulation_config = check_simulation_difference(
+            new_config=ending_config, old_config=self.property.main_heating_controls
+        )
+        # Only adjust if the current system is below good
+        if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor"]:
+            simulation_config["mainheatc_energy_eff_ending"] = "Average"
+        else:
+            simulation_config["mainheatc_energy_eff_ending"] = self.property.data["mainheatc-energy-eff"]
+
+        description_simulation = {
+            "mainheatcont-description": new_controls_description,
+            "mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
+        }
+
+        has_programmer = self.property.main_heating_controls["switch_system"] == "programmer"
+        has_trvs = self.property.main_heating_controls["trvs"] is not None
+        has_bypass = self.property.main_heating_controls["auxiliary_systems"] == "bypass"
+
+        cost_result = self.costs.programmer_trvs_bypass(
+            number_heated_rooms=int(self.property.data["number-heated-rooms"]),
+            has_trvs=has_trvs,
+            has_programmer=has_programmer,
+            has_bypass=has_bypass
+        )
+
+        description = "Install a Bypass valve, TRVs and a Programmer"
+
+        already_installed = "heating_control" in self.property.already_installed
+        if already_installed:
+            cost_result = override_costs(cost_result)
+            description = "Heating controls have already been upgraded, no further action needed."
+
+        self.recommendation.append(
+            {
+                "type": "heating_control",
+                "parts": [],
+                "description": description,
+                **cost_result,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                "simulation_config": simulation_config,
+                "description_simulation": description_simulation
+            }
+        )
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@ -1,6 +1,7 @@
 from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE
 from recommendations.recommendation_utils import check_simulation_difference, override_costs
 from backend.Property import Property
+from backend.app.plan.schemas import MEASURE_MAP
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
 from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
 from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
@ -28,7 +29,7 @@ class HeatingRecommender:
            self.property.main_heating["clean_description"] in self.ELECTRIC_HEATING_DESCRIPTIONS
        )

-    def is_high_heat_retention_valid(self):
+    def is_high_heat_retention_valid(self, ashp_only_heating_recommendation, measures):
        """
        Check conditions if high heat retention storage is valid
        :return:
@ -40,45 +41,30 @@ class HeatingRecommender:
            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"]
        )

-        return self.has_electric_heating_description or electric_heating_assumed
+        has_electric = self.has_electric_heating_description or electric_heating_assumed

-    def recommend(self, has_cavity_or_loft_recommendations, phase=0, exclusions=None):
+        return (
+            has_electric and (not ashp_only_heating_recommendation) and
+            ("high_heat_retention_storage_heater" in measures)
+        )
+
+    def is_boiler_upgrade_suitable(self, measures, ashp_only_heating_recommendation):
        """
-        Produces heating recommendations
-        :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
-        recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
-        before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
-        :param phase: indicates the phase of the retrofit programme
-        :param exclusions: A list of exclusions for the recommendations
+        These are the conditions we apply to recommend a boiler installation
+        :return:
        """

-        # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
-        #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
-        #       in the Costs class, stored as SYSTEM_FLUSH_COST
-
-        exclusions = [] if exclusions is None else exclusions
-
-        self.heating_recommendations = []
-        self.heating_control_recommendations = []
-        # This first iteration of the recommender will provide very basic recommendation
-        # We recommend heating controls based on the main heating system
-
-        if self.is_high_heat_retention_valid():
-            # Recommend high heat retention storage heaters
-            # TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
-            #       needs the controls
-            self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
-
-        # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
+        # 1) if the property has mains heating with boiler and radiators, we recommend optimal heating controls
        has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]

-        # We also check that the property doesn't have a heating system, but it has access to the mains gas
+        # 2) If the property doesn't have a heating system, but it has access to the mains gas
        no_heating_has_mains = self.property.main_heating["clean_description"] in [
            'No system present, electric heaters assumed'
        ] and self.property.data["mains-gas-flag"]

-        has_gas_heaters = (
-            self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
+        # The property is using portable heaters and has access to gas mains
+        has_room_heaters = (
+            self.property.main_heating["clean_description"] in ["Room heaters, mains gas", "Room heaters, electric"] and
            self.property.data["mains-gas-flag"]
        )

@ -91,13 +77,68 @@ class HeatingRecommender:
            self.property.data["mains-gas-flag"]
        )

-        if (
-            has_boiler or
-            no_heating_has_mains or
-            electic_heating_has_mains or
-            has_gas_heaters or
-            portable_heaters_has_mains
-        ):
+        is_valid = (
+            (
+                has_boiler or
+                no_heating_has_mains or
+                electic_heating_has_mains or
+                has_room_heaters or
+                portable_heaters_has_mains
+            ) and
+            (not ashp_only_heating_recommendation) and
+            ("boiler_upgrade" in measures)
+        )
+
+        return is_valid, has_boiler
+
+    def recommend(self, has_cavity_or_loft_recommendations, phase=0, measures=None):
+        """
+        Produces heating recommendations
+
+        :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
+        recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
+        before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
+        :param phase: indicates the phase of the retrofit programme
+        :param measures: A list of measures for the recommendations
+        """
+
+        measures = MEASURE_MAP["heating"] if measures is None else measures
+
+        # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
+        #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
+        #       in the Costs class, stored as SYSTEM_FLUSH_COST
+
+        # TODO: Right now, we don't have recommendations for electric boilers - we should probably have one
+
+        # if we have a non-invasive ashp recommendation, we get the configuration directly from the property instance
+        non_invasive_ashp_recommendation = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"),
+            {"suitable": True}
+        )
+        # We allow for the non-invasive recommendation to be that ASHP is not suitable
+
+        # This option will prevent other heating recommendations from being specified, other than an ASHP
+        ashp_only_heating_recommendation = non_invasive_ashp_recommendation.get(
+            "ashp_only_heating_recommendation", False
+        )
+        self.heating_recommendations = []
+        self.heating_control_recommendations = []
+        # This first iteration of the recommender will provide very basic recommendation
+        # We recommend heating controls based on the main heating system
+
+        hhr_valid = self.is_high_heat_retention_valid(ashp_only_heating_recommendation, measures)
+
+        if hhr_valid:
+            # Recommend high heat retention storage heaters
+            # TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
+            #       needs the controls
+            self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
+
+        gas_boiler_suitable, has_boiler = self.is_boiler_upgrade_suitable(
+            measures=measures, ashp_only_heating_recommendation=ashp_only_heating_recommendation
+        )
+
+        if gas_boiler_suitable:
            # This indicates that the home previously did not have a boiler in place and so would require
            # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
            system_change = not has_boiler
@ -116,9 +157,11 @@ class HeatingRecommender:
        # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
        # and either allow or prevent the recommendation of an air source heat pump

-        if self.is_ashp_valid(exclusions=exclusions):
+        if self.property.is_ashp_valid(measures=measures) and non_invasive_ashp_recommendation["suitable"]:
            self.recommend_air_source_heat_pump(
-                phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+                phase=phase,
+                has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
+
            )

        return
@ -186,19 +229,6 @@ class HeatingRecommender:
            description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi "
                           "boiler")

-    def is_ashp_valid(self, exclusions):
-
-        if "air_source_heat_pump" in self.property.non_invasive_recommendations:
-            return True
-
-        if "air_source_heat_pump" in exclusions:
-            return False
-
-        suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
-        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
-
-        return suitable_property_type and not has_air_source_heat_pump
-
    def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False):
        """
        This method will implement the recommendation for an air source heat pump
@ -207,117 +237,164 @@ class HeatingRecommender:
        :return:
        """

+        # Look for a non-intrusive recommendation
+        non_intrusive_recommendation = next((
+            r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"
+        ), {})
+
        controls_recommender = HeatingControlRecommender(self.property)
        controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")

        ashp_costs = self.costs.air_source_heat_pump()
-        # We add the costs of the heating controls, onto each key in the costs dictionary
-        if controls_recommender.recommendation:
-            for key in ashp_costs:
-                ashp_costs[key] += controls_recommender.recommendation[0][key]
+        if non_intrusive_recommendation:
+            # Update with non-intrusive recommendation
+            if non_intrusive_recommendation.get("cost"):
+                ashp_costs.update(
+                    {"total": non_intrusive_recommendation["cost"], "subtotal": None, "vat": None}
+                )

        already_installed = "air_source_heat_pump" in self.property.already_installed
+
+        controls_recommendations = controls_recommender.recommendation
+        if already_installed or not controls_recommendations:
+            # We set an empty object, so we just produce one recommendation
+            controls_recommendations = [None]
+
        if already_installed:
            ashp_costs = override_costs(ashp_costs)
-            description = "The property already has an air source heat pump, no further action needed."
-        else:
-            if controls_recommender.recommendation:
-                description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
-                               "room sensors and smart radiator valves (time & temperature zone control).")
-            else:
+
+        if non_intrusive_recommendation and not all([x is None for x in controls_recommendations]):
+            # We just use the ttzc control
+            controls_recommendations = [
+                x for x in controls_recommendations if (
+                    x["description_simulation"]["mainheatcont-description"] == "Time and temperature zone control"
+                )
+            ]
+
+        # This is a map from the heating controls description to the description of the air source heat pump set up
+        ashp_descriptions = {
+            "Time and temperature zone control": (
+                "Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
+                "room sensors and smart radiator valves (time & temperature zone control)."
+            ),
+            "Programmer, TRVs and bypass": (
+                "Install an air source heat pump, with programmer, TRVs and a Bypass valve."
+            ),
+        }
+
+        new_heating_description = "Air source heat pump, radiators, electric"
+        new_hot_water_description = "From main system"
+        ashp_recommendations = []
+        for controls_rec in controls_recommendations:
+
+            ashp_costs_with_controls = ashp_costs.copy()
+
+            if controls_rec:
+                for key in ashp_costs_with_controls:
+                    if ashp_costs_with_controls[key] is not None:
+                        ashp_costs_with_controls[key] += controls_rec[key]
+
+            if controls_rec is None:
                description = "Install an air source heat pump."
+            elif already_installed:
+                description = "The property already has an air source heat pump, no further action needed."
+            else:
+                description = ashp_descriptions[controls_rec["description_simulation"]["mainheatcont-description"]]

            # If the property does not have existing cavity and loft insulation, we include a note that the cost
            # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
            # to the funding
-            if has_cavity_or_loft_recommendations:
-                description = description + (f" The cost includes the £"
-                                             f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
-                                             f"You must ensure that the property has an insulated cavity and "
-                                             f"270mm+ loft insulation to qualify for the grant")
-            else:
-                description = description + (f" The cost includes the £"
-                                             f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
+            if not non_intrusive_recommendation:
+                if has_cavity_or_loft_recommendations:
+                    description = description + (
+                        f" The cost includes the £"
+                        f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
+                        f"You must ensure that the property has an insulated cavity and "
+                        f"270mm+ loft insulation to qualify for the grant"
+                    )
+                else:
+                    description = description + (
+                        f" The cost includes the £{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant"
+                    )

-        new_heating_description = "Air source heat pump, radiators, electric"
-        new_hot_water_description = "From main system"
-        simulation_config = {
-            "mainheat_energy_eff_ending": "Good",
-            "hot_water_energy_eff_ending": "Good"
-        }
-        description_simulation = {
-            "mainheat-description": new_heating_description,
-            "mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
-            "hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
-            "hotwater-description": new_hot_water_description,
-        }
-        # Installation of a boiler improves the hot water system so we need to reflect this in
-        # the outcome of the recommendation
-        heating_ending_config = MainHeatAttributes(new_heating_description).process()
-        hotwater_ending_config = HotWaterAttributes(new_hot_water_description).process()
-
-        # If the property does not currently have electric main fuel, we'll simulate the change
-        fuel_ending_config = {}
-        if self.property.main_fuel["fuel_type"] != "electricity":
-            new_fuel_description = "electricity (not community)"
-            fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
-            description_simulation = {
-                **description_simulation,
-                "main-fuel": new_fuel_description
+            simulation_config = {
+                "mainheat_energy_eff_ending": "Good",
+                "hot_water_energy_eff_ending": "Good"
            }
+            description_simulation = {
+                "mainheat-description": new_heating_description,
+                "mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
+                "hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
+                "hotwater-description": new_hot_water_description,
+            }
+            # Installation of a boiler improves the hot water system so we need to reflect this in
+            # the outcome of the recommendation
+            heating_ending_config = MainHeatAttributes(new_heating_description).process()
+            hotwater_ending_config = HotWaterAttributes(new_hot_water_description).process()

-        # Check the simulation differences
-        heating_simulation_config = check_simulation_difference(
-            new_config=heating_ending_config, old_config=self.property.main_heating
-        )
-        hotwater_simulation_config = check_simulation_difference(
-            new_config=hotwater_ending_config, old_config=self.property.hotwater
-        )
-        fuel_simulation_config = check_simulation_difference(
-            new_config=fuel_ending_config, old_config=self.property.main_fuel
-        )
+            # If the property does not currently have electric main fuel, we'll simulate the change
+            fuel_ending_config = {}
+            if self.property.main_fuel["fuel_type"] != "electricity":
+                new_fuel_description = "electricity (not community)"
+                fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
+                description_simulation = {
+                    **description_simulation,
+                    "main-fuel": new_fuel_description
+                }

-        simulation_config = {
-            **simulation_config,
-            **heating_simulation_config,
-            **hotwater_simulation_config,
-            **fuel_simulation_config,
-        }
+            # Check the simulation differences
+            heating_simulation_config = check_simulation_difference(
+                new_config=heating_ending_config, old_config=self.property.main_heating
+            )
+            hotwater_simulation_config = check_simulation_difference(
+                new_config=hotwater_ending_config, old_config=self.property.hotwater
+            )
+            fuel_simulation_config = check_simulation_difference(
+                new_config=fuel_ending_config, old_config=self.property.main_fuel
+            )

-        if controls_recommender.recommendation:
-            # We should have just the single recommendation for heat controls, which is time
-            # and temperature zone controls
-            if len(controls_recommender.recommendation) != 1:
-                raise NotImplementedError("More than one heat controls recommendation for air source heat pump")
            simulation_config = {
                **simulation_config,
-                **controls_recommender.recommendation[0]["simulation_config"]
+                **heating_simulation_config,
+                **hotwater_simulation_config,
+                **fuel_simulation_config,
            }

-            description_simulation = {
-                **description_simulation,
-                **controls_recommender.recommendation[0]["description_simulation"]
+            if controls_rec is not None:
+                # We should have just the single recommendation for heat controls, which is time
+                # and temperature zone controls
+                simulation_config = {
+                    **simulation_config,
+                    **controls_rec["simulation_config"]
+                }
+
+                description_simulation = {
+                    **description_simulation,
+                    **controls_rec["description_simulation"]
+                }
+
+            ashp_recommendation = {
+                "phase": phase,
+                "parts": [
+                    # TODO
+                ],
+                "type": "heating",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                "simulation_config": simulation_config,
+                "description_simulation": description_simulation,
+                **ashp_costs_with_controls
            }

-        ashp_recommendation = {
-            "phase": phase,
-            "parts": [
-                # TODO
-            ],
-            "type": "heating",
-            "description": description,
-            "starting_u_value": None,
-            "new_u_value": None,
-            "sap_points": None,
-            "already_installed": already_installed,
-            "simulation_config": simulation_config,
-            "description_simulation": description_simulation,
-            **ashp_costs
-        }
+            ashp_recommendations.append(ashp_recommendation)

        if _return:
-            return [ashp_recommendation]
-        self.heating_recommendations.append(ashp_recommendation)
+            return [ashp_recommendations]
+
+        self.heating_recommendations.extend(ashp_recommendations)

    @staticmethod
    def check_simulation_difference(old_config, new_config):
--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@ -66,6 +66,11 @@ class LightingRecommendations:
        if self.property.lighting["low_energy_proportion"] == 100:
            return

+        leds_recommendation_config = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "low_energy_lighting"),
+            {}
+        )
+
        number_lighting_outlets = self.property.number_lighting_outlets

        # Number non lel outlets
@ -79,6 +84,9 @@ class LightingRecommendations:
            return

        # Get the cost of the fittings
+        if leds_recommendation_config.get("cost"):
+            raise NotImplementedError("Costs from for low energy lighting have not been implemented")
+
        cost_result = self.costs.low_energy_lighting(
            number_of_lights=number_non_lel_outlets,
            number_current_lel_lights=number_lighting_outlets - number_non_lel_outlets,
@ -97,6 +105,12 @@ class LightingRecommendations:
            cost_result = override_costs(cost_result)
            description = "Low energy lighting has already been installed, no further action required"

+        if leds_recommendation_config.get("sap_points") is not None:
+            # This could be zero points
+            sap_points = leds_recommendation_config["sap_points"]
+        else:
+            sap_points = round(2 * (number_non_lel_outlets / number_lighting_outlets), 2)
+
        self.recommendation = [
            {
                "phase": phase,
@ -108,13 +122,14 @@ class LightingRecommendations:
                "already_installed": already_installed,
                # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
                # the proportion of lights that will be set to low energy
-                "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),
+                "sap_points": sap_points,
                "kwh_savings": heat_demand_change,
                "co2_equivalent_savings": carbon_change,
                "description_simulation": {
                    "lighting-energy-eff": "Very Good",
                    "lighting-description": "Low energy lighting in all fixed outlets",
                },
-                **cost_result
+                **cost_result,
+                "survey": leds_recommendation_config.get("survey", False)
            }
        ]
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@ -1,13 +1,16 @@
 import math
 import pandas as pd
 from backend.Property import Property
+from backend.app.plan.schemas import MEASURE_MAP
 from typing import List
 from datatypes.enums import QuantityUnits
 from recommendations.recommendation_utils import (
    get_roof_u_value, r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns,
-    update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs
+    update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs,
+    check_simulation_difference
 )
 from recommendations.Costs import Costs
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes


 class RoofRecommendations:
@ -76,22 +79,43 @@ class RoofRecommendations:

        return self.recommendations

-    def is_loft_already_insulated(self):
+    def is_loft_already_insulated(self, measures):
        """
        Check if the loft is already insulated
        """

        # If we have a non-invasive recommendation for the loft insulation, we can assume that the loft is not insulated
-        if "loft_insulation" in self.property.non_invasive_recommendations:
+        if "loft_insulation" in measures:
            return False

        return (self.insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]

-    def recommend(self, phase):
+    def is_room_roof_insulated(self):
+
+        """
+        Check if the room roof is already insulated
+        """
+
+        full_insulated_room_roof = (
+            self.property.roof["is_roof_room"] and
+            self.property.roof["insulation_thickness"] in ["average", "above_average"]
+        )
+
+        room_roof_insulated_at_rafters = (
+            self.property.roof["is_pitched"] and
+            self.property.roof["is_at_rafters"] and
+            self.property.roof["insulation_thickness"] in ["average", "above_average"]
+        )
+
+        return full_insulated_room_roof or room_roof_insulated_at_rafters
+
+    def recommend(self, phase, measures=None):

        if self.property.roof["has_dwelling_above"]:
            return

+        measures = MEASURE_MAP["roof_insulation"] if measures is None else measures
+
        u_value = self.property.roof["thermal_transmittance"]

        # We check if the roof is already insulated and if so, we exit
@ -99,14 +123,14 @@ class RoofRecommendations:
        # Building regulations part L recommend installing at least 270mm of insulation, however generally we
        # experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation
        # This only holds true for pitched roofs.
-        if self.is_loft_already_insulated():
+        if self.is_loft_already_insulated(measures):
            return

        if (self.insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:
            return

-        if self.property.roof["is_roof_room"]:
-            raise ValueError("Update convert_thickness_to_numeric for room roof and implement")
+        if self.is_room_roof_insulated():
+            return

        # If we have a u-value already, need to implement this
        if u_value:
@ -118,23 +142,37 @@ class RoofRecommendations:
                return
            raise NotImplementedError("Implement me")

-        u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band})
+        u_value = get_roof_u_value(
+            insulation_thickness=self.property.roof["insulation_thickness"],
+            has_dwelling_above=self.property.roof["has_dwelling_above"],
+            is_loft=self.property.roof["is_loft"],
+            is_roof_room=self.property.roof["is_roof_room"],
+            is_thatched=self.property.roof["is_thatched"],
+            age_band=self.property.age_band,
+            is_flat=self.property.roof["is_flat"],
+            is_pitched=self.property.roof["is_pitched"],
+            is_at_rafters=self.property.roof["is_at_rafters"],
+        )

        self.estimated_u_value = u_value
-        if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and (
-            "loft_insulation" not in self.property.non_invasive_recommendations
+        if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) or (
+            "loft_insulation" not in measures
        ):
            # The Roof is already compliant
            return

-        if self.property.roof["is_pitched"] or self.property.roof["is_flat"]:
-            insulation_thickness = (
-                0 if "loft_insulation" not in self.property.non_invasive_recommendations else self.insulation_thickness
-            )
+        if (self.property.roof["is_pitched"] and "loft_insulation" in measures) or (
+            self.property.roof["is_flat"] and "flat_roof_insulation" in measures
+        ):
+            insulation_thickness = 0 if "loft_insulation" not in measures else self.insulation_thickness
            self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
            return

-        if self.property.roof["is_roof_room"]:
+        # There are cases where the property might have a room roof as the second roof, but we have a recommendation for
+        # it, so we allow this override
+        if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures) or (
+            "room_roof_insulation" in [x["type"] for x in self.property.non_invasive_recommendations]
+        ):
            self.recommend_room_roof_insulation(u_value, phase)
            return

@ -245,6 +283,40 @@ class RoofRecommendations:
                        if already_installed:
                            cost_result = override_costs(cost_result)
                        new_thickness = insulation_thickness + material["depth"]
+
+                        # This is based on the values we have in the training data
+                        valid_numeric_values = [
+                            12,
+                            25,
+                            50,
+                            75,
+                            100,
+                            150,
+                            200,
+                            250,
+                            270,
+                            300,
+                            350,
+                            400,
+                        ]
+
+                        proposed_depth = new_thickness
+                        if (new_thickness not in valid_numeric_values) and material["type"] == "loft_insulation":
+                            # Take the nearest value for scoring
+                            proposed_depth = min(
+                                valid_numeric_values, key=lambda x: abs(x - proposed_depth)
+                            )
+
+                        if proposed_depth >= 270:
+                            new_efficiency = "Very Good"
+                        else:
+                            if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+                                new_efficiency = "Good"
+                            else:
+                                new_efficiency = "Very Good"
+
+                        new_description = f"Pitched, {int(proposed_depth)}mm loft insulation"
+
                    elif material["type"] == "flat_roof_insulation":
                        cost_result = self.costs.flat_roof_insulation(
                            floor_area=self.property.insulation_floor_area,
@ -254,38 +326,21 @@ class RoofRecommendations:
                        already_installed = "flat_roof_insulation" in self.property.already_installed
                        if already_installed:
                            cost_result = override_costs(cost_result)
-                        new_thickness = None
+                        new_description = "Flat, insulated"
+                        new_efficiency = "Good"
                    else:
                        raise ValueError("Invalid material type")

-                    # This is based on the values we have in the training data
-                    valid_numeric_values = [
-                        12,
-                        25,
-                        50,
-                        75,
-                        100,
-                        150,
-                        200,
-                        250,
-                        270,
-                        300,
-                        350,
-                        400,
-                    ]
+                    roof_ending_config = RoofAttributes(new_description).process()
+                    roof_simulation_config = check_simulation_difference(
+                        new_config=roof_ending_config, old_config=self.property.roof, prefix="roof_"
+                    )

-                    proposed_depth = new_thickness
-                    if new_thickness not in valid_numeric_values:
-                        # Take the nearest value for scoring
-                        proposed_depth = min(
-                            valid_numeric_values, key=lambda x: abs(x - proposed_depth)
-                        )
-
-                    if proposed_depth >= 270:
-                        new_efficiency = "Very Good"
-                    else:
-                        if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
-                            new_efficiency = "Good"
+                    simulation_config = {
+                        **roof_simulation_config,
+                        "roof_thermal_transmittance_ending": new_u_value,
+                        "roof_energy_eff_ending": new_efficiency
+                    }

                    recommendations.append(
                        {
@ -304,9 +359,9 @@ class RoofRecommendations:
                            "new_u_value": new_u_value,
                            "sap_points": None,
                            "already_installed": already_installed,
-                            "new_thickness": new_thickness,
+                            "simulation_config": simulation_config,
                            "description_simulation": {
-                                "roof-description": f"Pitched, {int(proposed_depth)}mm loft insulation",
+                                "roof-description": new_description,
                                "roof-energy-eff": new_efficiency
                            },
                            **cost_result
@ -357,18 +412,27 @@ class RoofRecommendations:
        :return:
        """

-        roof_roof_insulation_materials = [m for m in self.materials if m["type"] == "room_roof_insulation"]
-        if not roof_roof_insulation_materials:
-            raise ValueError("No room in roof insulation materials found")
+        # TODO: We temporarilty use costs from SCIS for RIR insulation. The costing was £180/m2 floor
+        roof_roof_insulation_materials = [
+            {
+                "type": "room_roof_insulation",
+                "description": "Insulating the ceiling of the roof roof and re-decorate",
+                "depths": [100],
+                "depth_unit": "mm",
+                "r_value_per_mm": 0.038,
+                "thermal_conductivity": 0.022,
+                "cost": [180],
+            }
+        ]

-        if self.property.pitched_roof_area is None:
-            raise ValueError("pitched_roof_area not included as property attribute")
+        rir_non_invasive_recommendation = next(
+            (x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {}
+        )

-        lowest_selected_u_value = None
+        # lowest_selected_u_value = None
        recommendations = []
        for material in roof_roof_insulation_materials:
            for depth, cost_per_unit in zip(material["depths"], material["cost"]):
-
                part_u_value = r_value_per_mm_to_u_value(depth, material["r_value_per_mm"])

                _, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
@ -380,36 +444,69 @@ class RoofRecommendations:
                # If I have a lowest U value and my new u value is lower than the lowest value, it's
                # further into the diminishing returns threshold and can shouldn't be

-                if is_diminishing_returns(
-                    recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
-                ):
-                    continue
+                # if is_diminishing_returns(
+                #     recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
+                # ):
+                #     continue

                # We allow a small tolerance for error so we don't discount the recommendation entirely
-                if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
-                    lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
+                # if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
+                # lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)

-                    estimated_cost = cost_per_unit * self.property.pitched_roof_area
+                estimated_cost = (
+                    cost_per_unit * self.property.insulation_floor_area if
+                    rir_non_invasive_recommendation.get("cost") is None else
+                    rir_non_invasive_recommendation.get("cost")
+                )

-                    recommendations.append(
-                        {
-                            "phase": phase,
-                            "parts": [
-                                get_recommended_part(
-                                    part=material,
-                                    selected_depth=depth,
-                                    quantity=self.property.pitched_roof_area,
-                                    quantity_unit=QuantityUnits.m2.value,
-                                    selected_total_cost=estimated_cost
-                                )
-                            ],
-                            "type": "room_roof_insulation",
-                            "description": self.make_room_roof_insulation_description(material, depth),
-                            "starting_u_value": u_value,
-                            "new_u_value": new_u_value,
-                            "sap_points": None,
-                            "cost": estimated_cost,
-                        }
-                    )
+                sap_points = rir_non_invasive_recommendation.get("sap_points", None)
+
+                # Could also be Roof room(s), ceiling insulated
+                new_descriptin = "Pitched, insulated at rafters"
+                roof_ending_config = RoofAttributes(new_descriptin).process()
+                roof_simulation_config = check_simulation_difference(
+                    new_config=roof_ending_config, old_config=self.property.roof, prefix="roof_"
+                )
+                if self.property.data["roof-energy-eff"] in ["Very Poor", "Poor"]:
+                    new_efficiency = "Average"
+                else:
+                    new_efficiency = self.property.data["roof-energy-eff"]
+
+                simulation_config = {
+                    **roof_simulation_config,
+                    "roof_thermal_transmittance_ending": new_u_value,
+                    "roof_energy_eff_ending": new_efficiency
+                }
+
+                already_installed = "flat_roof_insulation" in self.property.already_installed
+                cost_result = {
+                    "total": estimated_cost,
+                    "labour_hours": 80,
+                    "labour_days": 5,
+                }
+                if already_installed:
+                    cost_result = override_costs(cost_result)
+
+                recommendations.append(
+                    {
+                        "phase": phase,
+                        "parts": [
+                            # TODO
+                        ],
+                        "type": "room_roof_insulation",
+                        "description": "Insulate room in roof at rafters and re-decorate",
+                        "starting_u_value": u_value,
+                        "new_u_value": None,
+                        "sap_points": sap_points,
+                        "simulation_config": simulation_config,
+                        "description_simulation": {
+                            "roof-description": new_descriptin,
+                            "roof-energy-eff": new_efficiency
+                        },
+                        **cost_result,
+                        "already_installed": already_installed,
+                        "survey": rir_non_invasive_recommendation.get("survey", None)
+                    }
+                )

        self.recommendations = recommendations
--- a/recommendations/SecondaryHeating.py
+++ b/recommendations/SecondaryHeating.py
@ -60,6 +60,9 @@ class SecondaryHeating:
                **costs,
                "simulation_config": {
                    "secondheat_description_ending": "None"
+                },
+                "description_simulation": {
+                    "secondheat-description": "None"
                }
            }
        )
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@ -1,6 +1,8 @@
 import numpy as np
+import pandas as pd
+
 from recommendations.Costs import Costs
-from recommendations.recommendation_utils import override_costs
+from recommendations.recommendation_utils import override_costs, esimtate_pitched_roof_area


 class SolarPvRecommendations:
@ -78,23 +80,6 @@ class SolarPvRecommendations:
            }
        ]

-    def is_solar_pv_valid(self):
-
-        # If the property is a flat but we are looking at building solar potential, we can include this
-        if (self.property.building_id is not None) and (self.property.solar_panel_configuration is not None):
-            return True
-
-        is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
-        is_valid_roof_type = (
-            self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
-        )
-        # If there is no existing solar PV, the photo-supply field will be None or a missing value
-        has_no_existing_solar_pv = self.property.data["photo-supply"] in [
-            None, 0, self.property.DATA_ANOMALY_MATCHES
-        ]
-
-        return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
-
    def recommend_building_analysis(self, phase):
        """
        This recommendation approach handles the case of producing solar PV recommendations at the building level,
@ -114,10 +99,14 @@ class SolarPvRecommendations:
        best_configurations = panel_performance.head(1).reset_index(drop=True)

        for rank, recommendation_config in best_configurations.iterrows():
-            roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
+            # If we dont have the panneled_roof_area in the recommendation_config we calculate it
+            if recommendation_config.get("panneled_roof_area", None):
+                roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
+            else:
+                raise Exception("IMPLEMENT ME")
            # Spread the cost to the individual units - adding a 20% contingency
            total_cost = recommendation_config["total_cost"] / n_units
-            kw = np.floor(recommendation_config["array_warrage"] / 100) / 10
+            kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
            # Default to a weeks work for a team of 3 people doing 8 hour days
            labour_days = 5
            labour_hours = 3 * 8 * labour_days
@ -159,7 +148,7 @@ class SolarPvRecommendations:
        :return:
        """

-        if not self.is_solar_pv_valid():
+        if not self.property.is_solar_pv_valid():
            return

        # If we have a buiilding level analysis, we implement separate logic
@ -167,84 +156,77 @@ class SolarPvRecommendations:
            self.recommend_building_analysis(phase)
            return

-        solar_pv_percentage = self.property.solar_pv_percentage
-        # We round up to the neaest 10%
-        solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10
+        non_invasive_recommendation = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "solar_pv"), {"suitable": True}
+        )

-        # For the solar recommendations, we produce the following scenarios:
-        # 1) Solar panels only, we present a high, medium and low coverage
-        # 2) With and without battery
-        roof_coverage_scenarios = [
-            solar_pv_percentage - 0.1, solar_pv_percentage,
-        ]
-        if solar_pv_percentage <= 0.4:
-            roof_coverage_scenarios.append(solar_pv_percentage + 0.1)
-        # We make sure we haven't gone too low or high - we allow no more than 60% coverage
-        roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
-        # If we only have two scenarios, we add a coverage scenario 10% less than the smallest
-        if len(roof_coverage_scenarios) == 2:
-            roof_coverage_scenarios.insert(0, roof_coverage_scenarios[0] - 0.1)
-        battery_scenarios = [False, True]
+        # We allow for the non-invasive recommendation to be that solar PV is not suitable
+        if not non_invasive_recommendation["suitable"]:
+            return

-        scenarios_with_wattage = []
-        for roof_coverage in roof_coverage_scenarios:
-            # We now have a property which is potentially suitable for solar PV
-            solar_pv_roof_area = self.property.get_solar_pv_roof_area(roof_coverage)
+        if non_invasive_recommendation.get("array_wattage") is not None:

-            number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
-            solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
-
-            if solar_panel_wattage < self.MIN_SYSTEM_WATTAGE:
-                continue
-
-            solar_panel_wattage = np.clip(
-                a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
-            )
-            scenarios_with_wattage.append((roof_coverage, solar_panel_wattage))
-
-        # We trim the scenarios, so that we don't have duplicate wattages
-        scenarios_with_wattage = self.trim_solar_wattage_options(scenarios_with_wattage)
-
-        # Produce the cross product of the scenarios
-        scenarios = [
-            (roof, wattage, battery) for roof, wattage in scenarios_with_wattage for battery in battery_scenarios
-        ]
-        # We deduce the wattage of the solar panels based on the roof coverage
-
-        for roof_coverage, solar_panel_wattage, has_battery in scenarios:
-            # We now have a property which is potentially suitable for solar PV
-            roof_coverage_percent = round(roof_coverage * 100)
-            # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
-            # of solar PV installations
-            cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=has_battery)
-            kw = np.floor(solar_panel_wattage / 100) / 10
-
-            if has_battery:
-                description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on "
-                               f"{round(roof_coverage_percent)}% the roof, with a battery storage system.")
+            if self.property.roof["is_flat"]:
+                roof_area = self.property.insulation_floor_area
            else:
-                description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
-                               f"anel system on {round(roof_coverage_percent)}% the roof.")
-
-            already_installed = "solar_pv" in self.property.already_installed
-            if already_installed:
-                cost_result = override_costs(cost_result)
-
-            self.recommendation.append(
-                {
-                    "phase": phase,
-                    "parts": [],
-                    "type": "solar_pv",
-                    "description": description,
-                    "starting_u_value": None,
-                    "new_u_value": None,
-                    "sap_points": None,
-                    "already_installed": already_installed,
-                    **cost_result,
-                    # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
-                    # back up here
-                    "photo_supply": 100 * roof_coverage,
-                    "has_battery": has_battery,
-                    "description_simulation": {"photo-supply": 100 * roof_coverage},
-                }
+                roof_area = esimtate_pitched_roof_area(
+                    floor_area=self.property.insulation_floor_area, floor_height=self.property.data["floor-height"]
+                )
+            solar_configurations = pd.DataFrame(
+                [
+                    {
+                        "array_wattage": non_invasive_recommendation["array_wattage"],
+                        "initial_ac_kwh_per_year": non_invasive_recommendation["initial_ac_kwh_per_year"],
+                        "panneled_roof_area": non_invasive_recommendation["panneled_roof_area"]
+                    }
+                ]
            )
+        else:
+            # TODO: There may be some instances where we don't want to use the solar API so we should cover for them
+            panel_performance = self.property.solar_panel_configuration["panel_performance"]
+            roof_area = self.property.roof_area
+            solar_configurations = panel_performance.head(3).reset_index(drop=True)
+
+        # We combine each of these configurations with estimates with and without a battery
+        for rank, recommendation_config in solar_configurations.iterrows():
+            roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100)
+            # We round up to the nearest 10
+            roof_coverage_percent = np.ceil(roof_coverage_percent / 10) * 10
+            for has_battery in [False, True]:
+                cost_result = self.costs.solar_pv(
+                    wattage=recommendation_config["array_wattage"],
+                    has_battery=has_battery,
+                    array_cost=non_invasive_recommendation.get("cost", None)
+                )
+                kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
+                if has_battery:
+                    description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on "
+                                   f"{round(roof_coverage_percent)}% the roof, with a battery storage system.")
+                else:
+                    description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
+                                   f"anel system on {round(roof_coverage_percent)}% the roof.")
+
+                already_installed = "solar_pv" in self.property.already_installed
+                if already_installed:
+                    cost_result = override_costs(cost_result)
+
+                self.recommendation.append(
+                    {
+                        "phase": phase,
+                        "parts": [],
+                        "type": "solar_pv",
+                        "description": description,
+                        "starting_u_value": None,
+                        "new_u_value": None,
+                        "sap_points": None,
+                        "already_installed": already_installed,
+                        **cost_result,
+                        # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we
+                        # scale
+                        # back up here
+                        "photo_supply": roof_coverage_percent,
+                        "has_battery": has_battery,
+                        "initial_ac_kwh_per_year": recommendation_config["initial_ac_kwh_per_year"],
+                        "description_simulation": {"photo-supply": roof_coverage_percent},
+                    }
+                )
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@ -81,3 +81,44 @@ class VentilationRecommendations(Definitions):
                "labour_days": labour_days  # Assume 8 hour day
            }
        ]
+
+    def recommend_trickle_vents(self):
+        """
+        This is not something that we can identify completely non-invasively, however a recommendation which may come
+        about as a result of an energy assessment is the installation of trickle vents. This function handles that
+        """
+
+        trickle_vents_recommendation_config = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "trickle_vents"), {}
+        )
+
+        if not trickle_vents_recommendation_config:
+            return
+
+        description = (
+            "Install trickle vents on your windows" if
+            not trickle_vents_recommendation_config.get("description")
+            else trickle_vents_recommendation_config["description"]
+        )
+
+        return [
+            {
+                "phase": None,
+                "parts": [],
+                "type": "trickle_vents",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "already_installed": False,
+                "sap_points": 0,
+                "heat_demand": 0,
+                "kwh_savings": 0,
+                "co2_equivalent_savings": 0,
+                "energy_cost_savings": 0,
+                "total": trickle_vents_recommendation_config["cost"],
+                # We use a very simple and rough estimate of 4 hours per unit
+                "labour_hours": trickle_vents_recommendation_config.get("labour_hours", 8),
+                "labour_days": trickle_vents_recommendation_config.get("labour_days", 1),  # Assume 8 hour day
+                "survey": True
+            }
+        ]
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@ -5,6 +5,7 @@ import pandas as pd

 from datatypes.enums import QuantityUnits
 from backend.Property import Property
+from backend.app.plan.schemas import MEASURE_MAP
 from BaseUtility import Definitions
 from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
 from recommendations.recommendation_utils import (
@ -13,6 +14,7 @@ from recommendations.recommendation_utils import (
 )
 from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION
 from recommendations.Costs import Costs
+from recommendations.wall_energy_efficiency_values import cavity_wall_energy_eff, iwi_energy_eff, ewi_energy_eff
 from utils.logger import setup_logger

 logger = setup_logger()
@ -60,10 +62,13 @@ class WallRecommendations(Definitions):
        "Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
        "Solid brick, as built, no insulation": "Solid brick, with external insulation",
        "Solid brick, as built, insulated": "Solid brick, with external insulation",
+        "Solid brick, as built, partial insulation": "Solid brick, with external insulation",
        "Cob, as built": "Cob, with external insulation",
        "System built, as built, no insulation": "System built, with external insulation",
        "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
        "Timber frame, as built, no insulation": "Timber frame, with external insulation",
+        'Timber frame, as built, partial insulation': 'Timber frame, with external insulation',
+        "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with external insulation",
    }

    # These are the ending descriptions we consider for walls with internal insulation
@ -71,10 +76,13 @@ class WallRecommendations(Definitions):
        "Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
        "Solid brick, as built, no insulation": "Solid brick, with internal insulation",
        "Solid brick, as built, insulated": "Solid brick, with internal insulation",
+        "Solid brick, as built, partial insulation": "Solid brick, with internal insulation",
        "Cob, as built": "Cob, with internal insulation",
        "System built, as built, no insulation": "System built, with internal insulation",
        "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
        "Timber frame, as built, no insulation": "Timber frame, with internal insulation",
+        'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation',
+        "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with internal insulation",
    }

    def __init__(
@ -155,7 +163,7 @@ class WallRecommendations(Definitions):
        )

        # Test filling cavity
-        self.find_cavity_insulation(u_value, insulation_thickness, phase)
+        self.find_cavity_insulation(u_value, insulation_thickness, phase, measures)

        return self.recommendations

@ -183,11 +191,15 @@ class WallRecommendations(Definitions):

        return ewi_recommendations

-    def recommend(self, phase=0):
+    def recommend(self, phase=0, measures=None):
        # if building built after 1990 + we're able to identify U-value +
        # U-value less than 0.18 and if in or close to a conversation area,
        # recommend internal wall insulation as a possible measure

+        measures = MEASURE_MAP["wall_insulation"] if measures is None else measures
+        if not measures:
+            return
+
        u_value = self.property.walls["thermal_transmittance"]
        u_value = None if pd.isnull(u_value) else u_value

@ -200,7 +212,7 @@ class WallRecommendations(Definitions):
            or self.property.walls["is_filled_cavity"]
        ) and (
            "cavity_extract_and_refill"
-            not in self.property.non_invasive_recommendations
+            not in measures
        ):
            return

@ -228,15 +240,15 @@ class WallRecommendations(Definitions):
                and (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
            ):
                # Recommend insulation
-                self.find_insulation(u_value, phase)
+                self.find_insulation(u_value, phase, measures)
                return

            # We can't detect it's a cavity wall, but it was built after 1990 so likely built with insulation already
            # + it already has a U-value better than the building regulations, so we don't need to recommend anything
            if (
                (not is_cavity_wall)
-                and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
-                and (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
+                and ((self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
+                     or (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE))
            ):
                # Recommend nothing
                return
@ -252,22 +264,22 @@ class WallRecommendations(Definitions):

        self.estimated_u_value = u_value

-        if is_cavity_wall or "cavity_extract_and_refill" in self.property.non_invasive_recommendations:
+        if (is_cavity_wall and "cavity_wall_insulation" in measures) or "cavity_extract_and_refill" in measures:
            if u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
                # Test filling cavity
-                self.find_cavity_insulation(u_value, insulation_thickness, phase)
+                self.find_cavity_insulation(u_value, insulation_thickness, phase, measures)

            return

        # Remaining wall types are treated with IWI or EWI
        if (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and self.is_suitable_for_solid_insulation():
-            self.find_insulation(u_value, phase)
+            self.find_insulation(u_value, phase, measures=measures)
            return

        # If the u-value is within regulations, we don't do anything
        return

-    def find_cavity_insulation(self, u_value, insulation_thickness, phase):
+    def find_cavity_insulation(self, u_value, insulation_thickness, phase, measures):
        """
        This method tests different materials to fill the cavity wall, determining which
        material will give us the best U-value.
@ -287,6 +299,8 @@ class WallRecommendations(Definitions):
        :param u_value: u_value of the starting wall
        :param insulation_thickness: describes the insulation level of the wall. If "below average", we have a partially
        filled cavity wall
+        :param phase: The phase of the recommendation
+        :param measures: The measures we're considering
        """

        insulation_materials = pd.DataFrame(self.cavity_wall_insulation_materials)
@ -321,7 +335,7 @@ class WallRecommendations(Definitions):

                is_extraction_and_refill = (
                    "cavity_extract_and_refill"
-                    in self.property.non_invasive_recommendations
+                    in measures
                )

                cost_result = self.costs.cavity_wall_insulation(
@ -404,11 +418,28 @@ class WallRecommendations(Definitions):

        simulation_config = {}
        if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+            if wall_ending_config["is_cavity_wall"]:
+                efficiency_data = [
+                    x for x in cavity_wall_energy_eff if
+                    x["construction-age-band"] == self.property.construction_age_band
+                ][0]
+            elif wall_ending_config["internal_insulation"]:
+                efficiency_data = [
+                    x for x in iwi_energy_eff if
+                    x["construction-age-band"] == self.property.construction_age_band
+                ][0]
+            else:
+                efficiency_data = [
+                    x for x in ewi_energy_eff if
+                    x["construction-age-band"] == self.property.construction_age_band
+                ][0]
+
            simulation_config = {
-                "walls_energy_eff_ending": "Good"
+                "walls_energy_eff_ending": efficiency_data["walls-energy-eff"]
            }

        # We check if we have double insulation in any instances
+        # TODO: We should pull the energy efficiency categories on double insulation instances, though it's quite rate
        double_insulation = (
            (wall_ending_config["is_filled_cavity"] and wall_ending_config["external_insulation"]) or
            (wall_ending_config["is_filled_cavity"] and wall_ending_config["internal_insulation"]) or
@ -423,6 +454,16 @@ class WallRecommendations(Definitions):

        lowest_selected_u_value = None
        recommendations = []
+
+        iwi_non_invasive_recommendations = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "internal_wall_insulation"), {}
+        )
+        ewi_non_invasive_recommendations = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "external_wall_insulation"), {}
+        )
+        if ewi_non_invasive_recommendations:
+            raise NotImplementedError("Implement ewi non-invasive recommendations")
+
        for _, insulation_material_group in insulation_materials.groupby("description"):

            for _, material in insulation_material_group.iterrows():
@ -455,6 +496,15 @@ class WallRecommendations(Definitions):
                    )

                    if material["type"] == "internal_wall_insulation":
+
+                        if iwi_non_invasive_recommendations.get("cost") is not None:
+                            raise NotImplementedError(
+                                "Not handled passing costs from non-invasive recommendations for iwi"
+                            )
+
+                        sap_points = iwi_non_invasive_recommendations.get("sap_points", None)
+                        survey = iwi_non_invasive_recommendations.get("survey", False)
+
                        cost_result = self.costs.internal_wall_insulation(
                            wall_area=self.property.insulation_wall_area,
                            material=material.to_dict(),
@ -472,6 +522,10 @@ class WallRecommendations(Definitions):
                        )

                    elif material["type"] == "external_wall_insulation":
+
+                        sap_points = ewi_non_invasive_recommendations.get("sap_points", None)
+                        survey = ewi_non_invasive_recommendations.get("survey", False)
+
                        cost_result = self.costs.external_wall_insulation(
                            wall_area=self.property.insulation_wall_area,
                            material=material.to_dict(),
@ -522,19 +576,20 @@ class WallRecommendations(Definitions):
                            "starting_u_value": u_value,
                            "new_u_value": new_u_value,
                            "already_installed": already_installed,
-                            "sap_points": None,
+                            "sap_points": sap_points,
                            "simulation_config": simulation_config,
                            "description_simulation": {
                                "walls-description": new_description,
                                "walls-energy-eff": simulation_config["walls_energy_eff_ending"]
                            },
-                            **cost_result
+                            **cost_result,
+                            "survey": survey
                        }
                    )

        return recommendations

-    def find_insulation(self, u_value, phase):
+    def find_insulation(self, u_value, phase, measures):
        """
        This function contains the logic for finding potential insulation measures for a property, depending
        on the parts available and whether the property can have external wall insulation installed
@ -547,7 +602,7 @@ class WallRecommendations(Definitions):
        # consider diminishing returns between the two as they are considered to be separate measures

        ewi_recommendations = []
-        if self.ewi_valid():
+        if self.ewi_valid() and "external_wall_insulation" in measures:
            ewi_recommendations = self._find_insulation(
                u_value=u_value,
                insulation_materials=pd.DataFrame(
@ -557,12 +612,14 @@ class WallRecommendations(Definitions):
                phase=phase,
            )

-        iwi_recommendations = self._find_insulation(
-            u_value=u_value,
-            insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
-            non_insulation_materials=self.internal_wall_non_insulation_materials,
-            phase=phase,
-        )
+        iwi_recommendations = []
+        if "internal_wall_insulation" in measures:
+            iwi_recommendations = self._find_insulation(
+                u_value=u_value,
+                insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
+                non_insulation_materials=self.internal_wall_non_insulation_materials,
+                phase=phase,
+            )

        self.recommendations += ewi_recommendations + iwi_recommendations

--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@ -3,8 +3,9 @@ from typing import List
 import numpy as np

 from backend.Property import Property
+from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes
 from recommendations.Costs import Costs
-from recommendations.recommendation_utils import override_costs
+from recommendations.recommendation_utils import override_costs, check_simulation_difference


 class WindowsRecommendations:
@ -48,6 +49,7 @@ class WindowsRecommendations:
        is_secondary_glazing = self.property.restricted_measures or (
            self.property.windows["glazing_type"] == "secondary"
        )
+        windows_area = self.property.windows_area

        if not number_of_windows:
            raise ValueError("Number of windows not specified")
@ -57,6 +59,9 @@ class WindowsRecommendations:
        ):
            return

+        if windows_area is not None:
+            raise Exception("We have windows area, we should use this data for our recommendations!!!")
+
        # We scale the number of windows based on the proportion of existing glazing
        if self.property.data["multi-glaze-proportion"] != "":
            n_windows_scalar = 1 - (
@ -124,3 +129,64 @@ class WindowsRecommendations:
                }
            }
        ]
+
+    def recommend_mixed_glazing(self, phase):
+        """
+        This function will recommend mixed glazing to the property. This is a more specific recommendation than
+        the general windows recommendation, but is almost certain to arise from a survey
+        :return:
+        """
+
+        mixed_glazing_recommendation_config = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "mixed_glazing"), {}
+        )
+        if not mixed_glazing_recommendation_config:
+            return
+
+        description = (
+            "Install a combination of secondary and double glazing to single glazed windows" if
+            not mixed_glazing_recommendation_config.get("description")
+            else mixed_glazing_recommendation_config["description"]
+        )
+
+        windows_ending_config = WindowAttributes("Full secondary glazing").process()
+
+        windows_simulation_config = check_simulation_difference(
+            new_config=windows_ending_config, old_config=self.property.windows, prefix="windows_"
+        )
+
+        windows_simulation_config = {
+            **windows_simulation_config,
+            "windows_energy_eff_ending": "Average",
+            "glazed_type_ending": "secondary glazing",
+            "multi_glaze_proportion_ending": 100,
+        }
+
+        return [
+            {
+                "phase": phase,
+                "parts": [],
+                "type": "mixed_glazing",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "already_installed": False,
+                "sap_points": mixed_glazing_recommendation_config["sap_points"],
+                "heat_demand": None,  # We will predict this
+                "kwh_savings": None,  # We will predict this
+                "co2_equivalent_savings": None,  # We will predict this
+                "energy_cost_savings": None,  # We will predict this
+                "total": mixed_glazing_recommendation_config["cost"],
+                # We use a very simple and rough estimate of 4 hours per unit
+                "labour_hours": mixed_glazing_recommendation_config.get("labour_hours", 8),
+                "labour_days": mixed_glazing_recommendation_config.get("labour_days", 1),  # Assume 8 hour day
+                "survey": mixed_glazing_recommendation_config["survey"],
+                "simulation_config": windows_simulation_config,
+                "description_simulation": {
+                    "multi-glaze-proportion": 100,
+                    "windows-energy-eff": "Average",
+                    "windows-description": "Multiple glazing throughout",
+                    "glazed-type": "secondary glazing",
+                },
+            }
+        ]
--- a/recommendations/county_to_region.py
+++ b/recommendations/county_to_region.py
@ -161,6 +161,9 @@ county_to_region_map = {

    # Additional mappings requried, based on what we find in the EPC database
    'Greater London Authority': 'Inner London',
+    'Herefordshire, County of': 'West Midlands',
+    "North Northamptonshire": 'East Midlands',
+    "West Northamptonshire": 'East Midlands',
    # We have a bunch of inner London local authority mappings, which can be used if the county is not found
    'Barking and Dagenham': 'Inner London', 'Barnet': 'Inner London', 'Bexley': 'Inner London',
    'Brent': 'Inner London', 'Bromley': 'Inner London', 'Camden': 'Inner London', 'City of London': 'Inner London',
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@ -9,7 +9,7 @@ def prepare_input_measures(property_recommendations, goal):
    """

    goal_map = {
-        "Increase EPC": "sap_points"
+        "Increasing EPC": "sap_points"
    }

    goal_key = goal_map[goal]
--- a/recommendations/rdsap_tables.py
+++ b/recommendations/rdsap_tables.py
@ -514,8 +514,8 @@ FLOOR_LEVEL_MAP = {
    "top floor": 5,
    "20+": 20,
    "21st or above": 21,
-    **{str(i).zfill(2): i for i in range(0, 21)},
-    **{ordinal(i): i for i in range(-1, 21)},
-    **{str(i): i for i in range(-1, 21)},
-    **{i: i for i in range(-1, 21)},
+    **{str(i).zfill(2): i for i in range(0, 51)},
+    **{ordinal(i): i for i in range(-1, 51)},
+    **{str(i): i for i in range(-1, 51)},
+    **{i: i for i in range(-1, 51)},
 }
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@ -205,10 +205,22 @@ def get_wall_u_value(
    return float(mapped_value)


-def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched):
+def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched, is_at_rafters):
    """Get the U-value from table S9 based on the insulation thickness."""
+
+    # If the roof as pitched & insulated at the rafters, it's a room roof
+    if is_roof_room or is_at_rafters:
+        # We re-map the thickness
+        thickness_map = {
+            "below average": "50",
+            "average": "100",
+            "above average": "270",
+            "none": "0",
+        }
+        thickness = thickness_map[thickness]
+
    if thickness in ["below average", "average", "above average", "none", None] or (
-        not is_loft and not is_roof_room
+        not is_loft and not is_roof_room and not is_at_rafters
    ):
        return None
    elif thickness.endswith("+"):
@ -280,6 +292,7 @@ def get_roof_u_value(
        is_loft=is_loft,
        is_roof_room=is_roof_room,
        is_thatched=is_thatched,
+        is_at_rafters=is_at_rafters
    )

    if u_value is not None:
@ -676,7 +689,7 @@ def estimate_windows(
    property_type, built_form, construction_age_band, floor_area, number_habitable_rooms
 ):
    # If there is an extension, that will boost the number of habitable rooms
-    
+
    # Base window count based on habitable rooms
    window_count = number_habitable_rooms

--- a/recommendations/tests/test_data/heating_recommendations_data.py
+++ b/recommendations/tests/test_data/heating_recommendations_data.py
@ -0,0 +1,391 @@
+testing_examples = [
+    {
+        "epc": {
+            'lmk-key': '948324269042014090409224502942098', 'address1': '15, Ringwood Crescent', 'address2': None,
+            'address3': None, 'postcode': 'TS19 9DN', 'building-reference-number': 1016769078,
+            'current-energy-rating': 'C', 'potential-energy-rating': 'B', 'current-energy-efficiency': 79,
+            'potential-energy-efficiency': 85, 'property-type': 'House', 'built-form': 'Semi-Detached',
+            'inspection-date': '2014-08-21', 'local-authority': 'E06000004', 'constituency': 'E14000970',
+            'county': None,
+            'lodgement-date': '2014-09-04', 'transaction-type': 'none of the above', 'environment-impact-current': 77,
+            'environment-impact-potential': 85, 'energy-consumption-current': 152,
+            'energy-consumption-potential': 103.0, 'co2-emissions-current': 2.2, 'co2-emiss-curr-per-floor-area': 30,
+            'co2-emissions-potential': 1.5, 'lighting-cost-current': 61.0, 'lighting-cost-potential': 47.0,
+            'heating-cost-current': 625.0, 'heating-cost-potential': 522.0, 'hot-water-cost-current': 100.0,
+            'hot-water-cost-potential': 71.0, 'total-floor-area': 74.0, 'energy-tariff': 'Single',
+            'mains-gas-flag': 'Y', 'floor-level': 'NODATA!', 'flat-top-storey': None, 'flat-storey-count': None,
+            'main-heating-controls': 2106.0, 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
+            'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 70.0,
+            'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system', 'hot-water-energy-eff': 'Good',
+            'hot-water-env-eff': 'Good', 'floor-description': 'Solid, no insulation (assumed)',
+            'floor-energy-eff': None,
+            'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average',
+            'windows-env-eff': 'Average', 'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Good',
+            'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, mains gas', 'sheating-energy-eff': None,
+            'sheating-env-eff': None, 'roof-description': 'Pitched, 50 mm loft insulation', 'roof-energy-eff': 'Poor',
+            'roof-env-eff': 'Poor', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'mainheat-energy-eff': 'Good', 'mainheat-env-eff': 'Good',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'mainheatc-energy-eff': 'Good',
+            'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 70% of fixed outlets',
+            'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
+            'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0, 'heat-loss-corridor': 'NO DATA!',
+            'unheated-corridor-length': None, 'floor-height': 2.5, 'photo-supply': 50.0,
+            'solar-water-heating-flag': None,
+            'mechanical-ventilation': 'natural', 'address': '15, Ringwood Crescent',
+            'local-authority-label': 'Stockton-on-Tees', 'constituency-label': 'Stockton North',
+            'posttown': 'STOCKTON-ON-TEES', 'construction-age-band': 'England and Wales: 1950-1966',
+            'lodgement-datetime': '2014-09-04 09:22:45', 'tenure': 'owner-occupied',
+            'fixed-lighting-outlets-count': 10.0, 'low-energy-fixed-light-count': 7.0, 'uprn': 100110195416.0,
+            'uprn-source': 'Address Matched'
+        },
+        "heating_recommendation_descriptions": [
+            "Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and "
+            "smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade "
+            "scheme grant",
+        ],
+        "heating_controls_recommendation_descriptions": [
+            "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
+            "temperature zone control)"
+        ],
+        "notes": "This property has a boiler, radiators & mains gas with good efficiency so the only recommendation"
+                 "we expect here is for an air source heat pump. The heating controls are a programmer, room thermostat"
+                 "and TRVs and so we should expect a TTZC recommendation"
+    },
+    {
+        "epc": {
+            'lmk-key': '153995620832008100717310934068296', 'address1': 'Apartment 13 The Quays',
+            'address2': 'Burscough', 'address3': None, 'postcode': 'L40 5TW',
+            'building-reference-number': 2604281568, 'current-energy-rating': 'C', 'potential-energy-rating': 'B',
+            'current-energy-efficiency': 69, 'potential-energy-efficiency': 84, 'property-type': 'Flat',
+            'built-form': 'Detached', 'inspection-date': '2008-10-06', 'local-authority': 'E07000127',
+            'constituency': 'E14001033', 'county': 'Lancashire', 'lodgement-date': '2008-10-07',
+            'transaction-type': 'marketed sale', 'environment-impact-current': 78,
+            'environment-impact-potential': 78, 'energy-consumption-current': 195,
+            'energy-consumption-potential': 192.0, 'co2-emissions-current': 1.7,
+            'co2-emiss-curr-per-floor-area': 29, 'co2-emissions-potential': 1.7, 'lighting-cost-current': 35,
+            'lighting-cost-potential': 38, 'heating-cost-current': 108, 'heating-cost-potential': 89,
+            'hot-water-cost-current': 256, 'hot-water-cost-potential': 104, 'total-floor-area': 57.2,
+            'energy-tariff': 'Single', 'mains-gas-flag': 'N', 'floor-level': '1st', 'flat-top-storey': 'Y',
+            'flat-storey-count': 2.0, 'main-heating-controls': 2603.0, 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
+            'extension-count': 0.0, 'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0,
+            'low-energy-lighting': 77.0, 'number-open-fireplaces': 0.0,
+            'hotwater-description': 'Electric immersion, standard tariff', 'hot-water-energy-eff': 'Very Poor',
+            'hot-water-env-eff': 'Poor', 'floor-description': '(other premises below)', 'floor-energy-eff': None,
+            'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good',
+            'windows-env-eff': 'Good', 'walls-description': 'Cavity wall, as built, insulated (assumed)',
+            'walls-energy-eff': 'Good', 'walls-env-eff': 'Good',
+            'secondheat-description': 'Portable electric heaters', 'sheating-energy-eff': None,
+            'sheating-env-eff': None, 'roof-description': '(another dwelling above)', 'roof-energy-eff': None,
+            'roof-env-eff': None, 'mainheat-description': 'Room heaters, electric',
+            'mainheat-energy-eff': 'Very Poor', 'mainheat-env-eff': 'Poor',
+            'mainheatcont-description': 'Programmer and appliance thermostats', 'mainheatc-energy-eff': 'Good',
+            'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 77% of fixed outlets',
+            'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
+            'main-fuel': 'electricity - this is for backwards compatibility only and should not be used',
+            'wind-turbine-count': 0.0, 'heat-loss-corridor': 'heated corridor', 'unheated-corridor-length': None,
+            'floor-height': 2.3, 'photo-supply': 0.0, 'solar-water-heating-flag': 'N',
+            'mechanical-ventilation': 'natural', 'address': 'Apartment 13 The Quays, Burscough',
+            'local-authority-label': 'West Lancashire', 'constituency-label': 'West Lancashire',
+            'posttown': 'ORMSKIRK', 'construction-age-band': 'England and Wales: 2003-2006',
+            'lodgement-datetime': '2008-10-07 17:31:09', 'tenure': 'owner-occupied',
+            'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None, 'uprn': 10012342725.0,
+            'uprn-source': 'Address Matched',
+        },
+        "heating_recommendation_descriptions": [
+            "Install high heat retention electric storage heaters and upgrade heating controls to High Heat Retention "
+            "Storage Heater Controls"
+        ],
+        "heating_controls_recommendation_descriptions": [],
+        "notes": "This property has electric room heaters and is off gas so a boiler recommendation is not appropriate."
+                 "We would expect a high heat retention storage recommendation. The property is a flat and therefore"
+                 "we don't expect an air source heat pump recommendation. We also wouldn't expect a specific heating"
+                 "control recommendation here"
+    },
+    {
+        "epc": {
+            'lmk-key': '751851300152012022010205497220090', 'address1': '21, Fullers Close', 'address2': 'Kelvedon',
+            'address3': None, 'postcode': 'CO5 9JX', 'building-reference-number': 8075968, 'current-energy-rating': 'D',
+            'potential-energy-rating': 'D', 'current-energy-efficiency': 55, 'potential-energy-efficiency': 56,
+            'property-type': 'Bungalow', 'built-form': 'Detached', 'inspection-date': '2012-02-20',
+            'local-authority': 'E07000067', 'constituency': 'E14001045', 'county': 'Essex',
+            'lodgement-date': '2012-02-20',
+            'transaction-type': 'non marketed sale', 'environment-impact-current': 39,
+            'environment-impact-potential': 39,
+            'energy-consumption-current': 475, 'energy-consumption-potential': 472.0, 'co2-emissions-current': 5.4,
+            'co2-emiss-curr-per-floor-area': 84, 'co2-emissions-potential': 5.4, 'lighting-cost-current': 53.0,
+            'lighting-cost-potential': 40.0, 'heating-cost-current': 674.0, 'heating-cost-potential': 678.0,
+            'hot-water-cost-current': 110.0, 'hot-water-cost-potential': 110.0, 'total-floor-area': 64.45,
+            'energy-tariff': 'dual', 'mains-gas-flag': 'N', 'floor-level': 'NODATA!', 'flat-top-storey': None,
+            'flat-storey-count': None, 'main-heating-controls': '2402', 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
+            'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 67.0,
+            'number-open-fireplaces': 0.0, 'hotwater-description': 'Electric immersion, off-peak',
+            'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Very Poor',
+            'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
+            'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
+            'walls-description': 'Cavity wall, as built, insulated (assumed)', 'walls-energy-eff': 'Good',
+            'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, electric', 'sheating-energy-eff': None,
+            'sheating-env-eff': None, 'roof-description': 'Pitched, 300+ mm loft insulation',
+            'roof-energy-eff': 'Very Good',
+            'roof-env-eff': 'Very Good', 'mainheat-description': 'Electric storage heaters',
+            'mainheat-energy-eff': 'Poor',
+            'mainheat-env-eff': 'Very Poor', 'mainheatcont-description': 'Automatic charge control',
+            'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
+            'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'lighting-energy-eff': 'Good',
+            'lighting-env-eff': 'Good', 'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0,
+            'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.38,
+            'photo-supply': 0.0,
+            'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural',
+            'address': '21, Fullers Close, Kelvedon',
+            'local-authority-label': 'Braintree', 'constituency-label': 'Witham', 'posttown': 'COLCHESTER',
+            'construction-age-band': 'England and Wales: 1983-1990', 'lodgement-datetime': '2012-02-20 10:20:54',
+            'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 6.0, 'low-energy-fixed-light-count': 4.0,
+            'uprn': 100090311351.0, 'uprn-source': 'Address Matched', 'property-type_y': None, 'built-form_y': None,
+        },
+        "heating_recommendation_descriptions": [],
+        "heating_controls_recommendation_descriptions": [],
+        "notes": "This test has electric storage heaters with automatic charge control - this case should be researched"
+                 "and checked that a high heat retention storage recommendation is actually sensible. If it's not, "
+                 "we should adjust accordingly or perhaps have just a control recommendation"
+    },
+    {
+        "epc": {
+            'lmk-key': '1356416458532015082116515621278108', 'address1': '19a, St. Stephens Road', 'address2': None,
+            'address3': None, 'postcode': 'TW3 2BH', 'building-reference-number': 5821158378,
+            'current-energy-rating': 'E', 'potential-energy-rating': 'C', 'current-energy-efficiency': 54,
+            'potential-energy-efficiency': 76, 'property-type': 'Maisonette', 'built-form': 'Semi-Detached',
+            'inspection-date': '2015-08-21', 'local-authority': 'E09000018', 'constituency': 'E14000593',
+            'county': 'Greater London Authority', 'lodgement-date': '2015-08-21', 'transaction-type': 'marketed sale',
+            'environment-impact-current': 48, 'environment-impact-potential': 78, 'energy-consumption-current': 383,
+            'energy-consumption-potential': 155, 'co2-emissions-current': 3.4, 'co2-emiss-curr-per-floor-area': 68,
+            'co2-emissions-potential': 1.4, 'lighting-cost-current': 52, 'lighting-cost-potential': 34,
+            'heating-cost-current': 560, 'heating-cost-potential': 255, 'hot-water-cost-current': 166,
+            'hot-water-cost-potential': 102, 'total-floor-area': 51.0, 'energy-tariff': 'Single', 'mains-gas-flag': 'Y',
+            'floor-level': '1st', 'flat-top-storey': 'Y', 'flat-storey-count': None, 'main-heating-controls': '2104',
+            'multi-glaze-proportion': 100.0, 'glazed-type': 'double glazing, unknown install date',
+            'glazed-area': 'Normal', 'extension-count': 0.0, 'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0,
+            'low-energy-lighting': 50.0, 'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system',
+            'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Average',
+            'floor-description': '(another dwelling below)', 'floor-energy-eff': 'NO DATA!', 'floor-env-eff': None,
+            'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
+            'walls-description': 'Solid brick, as built, no insulation (assumed)', 'walls-energy-eff': 'Very Poor',
+            'walls-env-eff': 'Very Poor', 'secondheat-description': 'Room heaters, mains gas',
+            'sheating-energy-eff': None, 'sheating-env-eff': None,
+            'roof-description': 'Pitched, 100 mm loft insulation',
+            'roof-energy-eff': 'Average', 'roof-env-eff': 'Average',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Good',
+            'mainheat-env-eff': 'Good', 'mainheatcont-description': 'Programmer and room thermostat',
+            'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
+            'lighting-description': 'Low energy lighting in 50% of fixed outlets', 'lighting-energy-eff': 'Good',
+            'lighting-env-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0,
+            'heat-loss-corridor': 'no corridor', 'unheated-corridor-length': None, 'floor-height': 2.5,
+            'photo-supply': None, 'solar-water-heating-flag': 'N', 'mechanical-ventilation': 'natural',
+            'address': '19a, St. Stephens Road', 'local-authority-label': 'Hounslow',
+            'constituency-label': 'Brentford and Isleworth', 'posttown': 'HOUNSLOW',
+            'construction-age-band': 'England and Wales: 1930-1949', 'lodgement-datetime': '2015-08-21 16:51:56',
+            'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None,
+            'uprn': 100021560521.0, 'uprn-source': 'Address Matched',
+        },
+        "heating_recommendation_descriptions": [],
+        "heating_controls_recommendation_descriptions": [],
+        "notes": ""
+    },
+    {
+        "epc": {
+            'lmk-key': '1164410099442014062611405027442168', 'address1': '31, Brightside Road', 'address2': None,
+            'address3': None, 'postcode': 'SE13 6EP', 'building-reference-number': 5481394278,
+            'current-energy-rating': 'E', 'potential-energy-rating': 'C', 'current-energy-efficiency': 48,
+            'potential-energy-efficiency': 79, 'property-type': 'House', 'built-form': 'Mid-Terrace',
+            'inspection-date': '2014-06-26', 'local-authority': 'E09000023', 'constituency': 'E14000789',
+            'county': 'Greater London Authority', 'lodgement-date': '2014-06-26',
+            'transaction-type': 'assessment for green deal', 'environment-impact-current': 44,
+            'environment-impact-potential': 77, 'energy-consumption-current': 334,
+            'energy-consumption-potential': 121.0, 'co2-emissions-current': 5.1, 'co2-emiss-curr-per-floor-area': 64,
+            'co2-emissions-potential': 1.9, 'lighting-cost-current': 70.0, 'lighting-cost-potential': 49.0,
+            'heating-cost-current': 964.0, 'heating-cost-potential': 571.0, 'hot-water-cost-current': 107.0,
+            'hot-water-cost-potential': 72.0, 'total-floor-area': 80.0, 'energy-tariff': 'Single',
+            'mains-gas-flag': 'Y', 'floor-level': 'NODATA!', 'flat-top-storey': None, 'flat-storey-count': None,
+            'main-heating-controls': '2102', 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 1.0,
+            'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 56.0,
+            'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system', 'hot-water-energy-eff': 'Good',
+            'hot-water-env-eff': 'Good', 'floor-description': 'Suspended, no insulation (assumed)',
+            'floor-energy-eff': None, 'floor-env-eff': None, 'windows-description': 'Fully double glazed',
+            'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
+            'walls-description': 'Solid brick, as built, no insulation (assumed)', 'walls-energy-eff': 'Very Poor',
+            'walls-env-eff': 'Very Poor', 'secondheat-description': 'Room heaters, mains gas',
+            'sheating-energy-eff': None, 'sheating-env-eff': None,
+            'roof-description': 'Pitched, no insulation (assumed)',
+            'roof-energy-eff': 'Very Poor', 'roof-env-eff': 'Very Poor',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Good',
+            'mainheat-env-eff': 'Good', 'mainheatcont-description': 'Programmer, no room thermostat',
+            'mainheatc-energy-eff': 'Very Poor', 'mainheatc-env-eff': 'Very Poor',
+            'lighting-description': 'Low energy lighting in 56% of fixed outlets', 'lighting-energy-eff': 'Good',
+            'lighting-env-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0,
+            'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.5,
+            'photo-supply': 0.0,
+            'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural', 'address': '31, Brightside Road',
+            'local-authority-label': 'Lewisham', 'constituency-label': 'Lewisham, Deptford', 'posttown': 'LONDON',
+            'construction-age-band': 'England and Wales: before 1900', 'lodgement-datetime': '2014-06-26 11:40:50',
+            'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 9.0, 'low-energy-fixed-light-count': 5.0,
+            'uprn': 100021936225.0, 'uprn-source': 'Address Matched',
+        },
+        "heating_recommendation_descriptions": [
+            'Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and '
+            'smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade '
+            'scheme grant',
+        ],
+        "heating_controls_recommendation_descriptions": [
+            'upgrade heating controls to Room thermostat, programmer and TRVs',
+            'Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & '
+            'temperature zone control)'
+        ],
+        "notes": "Because this property already has a boiler, we don't recommend HHR. We only have a "
+                 "heating recommendation for an air source heat pump. Because the heating controls are "
+                 "Programmer, no room thermostat, we have a programmer, room thermostat and trvs recommendation"
+                 "for heating controls and for TTZC."
+    },
+    {
+        "epc": {
+            'lmk-key': '1139584119102014052116014126342698', 'address1': '13, Starbuck Street', 'address2': 'Rudry',
+            'address3': None, 'postcode': 'CF83 3DP', 'building-reference-number': 2187913278,
+            'current-energy-rating': 'E', 'potential-energy-rating': 'D', 'current-energy-efficiency': 44,
+            'potential-energy-efficiency': 61, 'property-type': 'Flat', 'built-form': 'Semi-Detached',
+            'inspection-date': '2014-05-21', 'local-authority': 'W06000018', 'constituency': 'W07000076',
+            'county': None,
+            'lodgement-date': '2014-05-21', 'transaction-type': 'rental (private)', 'environment-impact-current': 49,
+            'environment-impact-potential': 64, 'energy-consumption-current': 343,
+            'energy-consumption-potential': 240.0, 'co2-emissions-current': 4.0, 'co2-emiss-curr-per-floor-area': 61,
+            'co2-emissions-potential': 2.8, 'lighting-cost-current': 49.0, 'lighting-cost-potential': 49.0,
+            'heating-cost-current': 752.0, 'heating-cost-potential': 429.0, 'hot-water-cost-current': 281.0,
+            'hot-water-cost-potential': 281.0, 'total-floor-area': 66.0, 'energy-tariff': 'Single',
+            'mains-gas-flag': 'N', 'floor-level': '1st', 'flat-top-storey': 'Y', 'flat-storey-count': None,
+            'main-heating-controls': 2602.0, 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
+            'extension-count': 0.0, 'number-habitable-rooms': 4.0, 'number-heated-rooms': 4.0,
+            'low-energy-lighting': 86.0, 'number-open-fireplaces': 0.0,
+            'hotwater-description': 'Electric immersion, standard tariff', 'hot-water-energy-eff': 'Very Poor',
+            'hot-water-env-eff': 'Very Poor', 'floor-description': '(other premises below)', 'floor-energy-eff': None,
+            'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good',
+            'windows-env-eff': 'Good', 'walls-description': 'Cavity wall, as built, no insulation (assumed)',
+            'walls-energy-eff': 'Poor', 'walls-env-eff': 'Poor', 'secondheat-description': 'None',
+            'sheating-energy-eff': None, 'sheating-env-eff': None,
+            'roof-description': 'Pitched, 200 mm loft insulation',
+            'roof-energy-eff': 'Good', 'roof-env-eff': 'Good', 'mainheat-description': 'Room heaters, electric',
+            'mainheat-energy-eff': 'Very Poor', 'mainheat-env-eff': 'Very Poor',
+            'mainheatcont-description': 'Appliance thermostats', 'mainheatc-energy-eff': 'Good',
+            'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 86% of fixed outlets',
+            'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
+            'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0, 'heat-loss-corridor': 'no corridor',
+            'unheated-corridor-length': None, 'floor-height': 2.5, 'photo-supply': 0.0,
+            'solar-water-heating-flag': None,
+            'mechanical-ventilation': 'natural', 'address': '13, Starbuck Street, Rudry',
+            'local-authority-label': 'Caerphilly', 'constituency-label': 'Caerphilly', 'posttown': 'CAERPHILLY',
+            'construction-age-band': 'England and Wales: 1950-1966', 'lodgement-datetime': '2014-05-21 16:01:41',
+            'tenure': 'rental (private)', 'fixed-lighting-outlets-count': 7.0, 'low-energy-fixed-light-count': 6.0,
+            'uprn': 43088770.0, 'uprn-source': 'Address Matched',
+        },
+        "heating_recommendation_descriptions": [
+            'Install high heat retention electric storage heaters and upgrade heating controls to High Heat Retention '
+            'Storage Heater Controls'
+        ],
+        "heating_controls_recommendation_descriptions": [],
+        "notes": "This property is a flat so we don't have an ASHP recommendation. It also doesn't have access to the "
+                 "mains and so it can't have a gas boiler. We don't expect any controls recommendations"
+    },
+    {
+        "epc": {
+            'lmk-key': '492646189022010060208143796198410', 'address1': '67, Ridgeway Road', 'address2': None,
+            'address3': None, 'postcode': 'HP5 2EW', 'building-reference-number': 1976846768,
+            'current-energy-rating': 'D', 'potential-energy-rating': 'D', 'current-energy-efficiency': 64,
+            'potential-energy-efficiency': 68, 'property-type': 'Bungalow', 'built-form': 'Detached',
+            'inspection-date': '2010-06-01', 'local-authority': 'E07000005', 'constituency': 'E14000631',
+            'county': 'Buckinghamshire', 'lodgement-date': '2010-06-02', 'transaction-type': 'marketed sale',
+            'environment-impact-current': 67, 'environment-impact-potential': 70, 'energy-consumption-current': 249,
+            'energy-consumption-potential': 231.0, 'co2-emissions-current': 3.5, 'co2-emiss-curr-per-floor-area': 35,
+            'co2-emissions-potential': 3.2, 'lighting-cost-current': 89.0, 'lighting-cost-potential': 51.0,
+            'heating-cost-current': 627.0, 'heating-cost-potential': 603.0, 'hot-water-cost-current': 105.0,
+            'hot-water-cost-potential': 105.0, 'total-floor-area': 76.0, 'energy-tariff': 'Single',
+            'mains-gas-flag': 'Y', 'floor-level': 'NO DATA!', 'flat-top-storey': None, 'flat-storey-count': None,
+            'main-heating-controls': 2104.0, 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
+            'extension-count': 0.0, 'number-habitable-rooms': 7.0, 'number-heated-rooms': 7.0,
+            'low-energy-lighting': 25.0, 'number-open-fireplaces': 1.0, 'hotwater-description': 'From main system',
+            'hot-water-energy-eff': 'Very Good', 'hot-water-env-eff': 'Very Good',
+            'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
+            'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good', 'windows-env-eff': 'Good',
+            'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Good', 'walls-env-eff': 'Good',
+            'secondheat-description': 'Room heaters, wood logs', 'sheating-energy-eff': None, 'sheating-env-eff': None,
+            'roof-description': 'Pitched, 150 mm loft insulation', 'roof-energy-eff': 'Good', 'roof-env-eff': 'Good',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Very Good',
+            'mainheat-env-eff': 'Very Good', 'mainheatcont-description': 'Programmer and room thermostat',
+            'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
+            'lighting-description': 'Low energy lighting in 25% of fixed outlets', 'lighting-energy-eff': 'Average',
+            'lighting-env-eff': 'Average',
+            'main-fuel': 'mains gas - this is for backwards compatibility only and should not be used',
+            'wind-turbine-count': 0.0, 'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None,
+            'floor-height': 2.4, 'photo-supply': 0.0, 'solar-water-heating-flag': 'N',
+            'mechanical-ventilation': 'natural', 'address': '67, Ridgeway Road', 'local-authority-label': 'Chiltern',
+            'constituency-label': 'Chesham and Amersham', 'posttown': 'CHESHAM',
+            'construction-age-band': 'England and Wales: 1930-1949', 'lodgement-datetime': '2010-06-02 08:14:37',
+            'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None,
+            'uprn': 100080513604.0, 'uprn-source': 'Address Matched'
+        },
+        "heating_recommendation_descriptions": [
+            'Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and '
+            'smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade '
+            'scheme grant'
+        ],
+        "heating_controls_recommendation_descriptions": [
+            'upgrade heating controls to Room thermostat, programmer and TRVs',
+            'Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & '
+            'temperature zone control)'
+
+        ],
+        "notes": "This has a very efficient boiler and is a detached bungalow, but only has "
+                 "Programmer and room thermostat for heating controls so we'd expect an ASHP heating recommendation"
+                 "as the only option, and heating controls recommendations for programmer, room thermostats and trvs"
+                 "as well as ttzc"
+    }
+]
+
+import random
+from pathlib import Path
+import inspect
+import pandas as pd
+
+# this can be used to get example data to build the test cases
+src_file_path = inspect.getfile(lambda: None)
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+directory = random.sample(epc_directories, 1)[0]
+data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+# Rename the columns to the same format as the api returns
+data.columns = [c.replace("_", "-").lower() for c in data.columns]
+data["floor-height"] = data["floor-height"].fillna(2.45)
+
+used_examples = pd.DataFrame(
+    [
+        {
+            "mainheat-description": x["epc"]["mainheat-description"],
+            "mainheat-energy-eff": x["epc"]["mainheat-energy-eff"],
+            "property-type": x["epc"]["property-type"],
+            "built-form": x["epc"]["built-form"],
+            "used": True
+        } for x in testing_examples
+    ]
+)
+
+data = data.merge(
+    used_examples, how="left", on=["mainheat-description", "mainheat-energy-eff", "built-form", "property-type"]
+)
+data = data[pd.isnull(data["used"])].drop(columns=["used"])
+
+eg = data.sample(1).to_dict("records")[0]
+print(eg["mainheat-description"])
+print(eg["mainheat-energy-eff"])
+print(eg["property-type"])
+print(eg["built-form"])
+print(eg["mainheatcont-description"])
--- a/recommendations/tests/test_heating_recommendations.py
+++ b/recommendations/tests/test_heating_recommendations.py
@ -0,0 +1,124 @@
+from datetime import datetime
+import pandas as pd
+import msgpack
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
+import pytest
+from backend.Property import Property
+from etl.epc.Record import EPCRecord
+from etl.bill_savings.KwhData import KwhData
+from recommendations.HeatingRecommender import HeatingRecommender
+from recommendations.tests.test_data.heating_recommendations_data import testing_examples
+
+
+class TestHeatingRecommendations:
+
+    @pytest.fixture
+    def cleaning_data(self):
+        return read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+    @pytest.fixture
+    def cleaned(self):
+        df = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+
+        df = msgpack.unpackb(df, raw=False)
+        return df
+
+    @pytest.fixture
+    def kwh_client(self):
+        client = KwhData(bucket="retrofit-data-dev", read_consumption_data=False)
+        # We fix this pricing table for these tests
+        client.retail_price_comparison = pd.DataFrame(
+            [
+                {
+                    "Date": datetime.today().strftime("%Y-%m-%d"),
+                    'Average standard variable tariff (Large legacy suppliers)': 1
+                }
+            ]
+        )
+        client.retail_price_comparison["Date"] = pd.to_datetime(client.retail_price_comparison["Date"])
+        return client
+
+    @pytest.mark.parametrize(
+        "test_case",
+        testing_examples
+    )
+    def test_recommend(self, test_case, cleaning_data, cleaned, kwh_client):
+        """
+        With this function, we test out multiple heating descriptions and check which recomendations
+        we retrieve alongside them
+        :return:
+        """
+
+        if test_case["epc"]["uprn"] == 100090311351:
+            raise Exception(
+                "This test has electric storage heaters with automatic charge control - this case should be researched"
+                "and checked that a high heat retention storage recommendation is actually sensible. If it's not, "
+                "we should adjust accordingly or perhaps have just a control recommendation"
+            )
+
+        if test_case["epc"]["uprn"] == 100021560521:
+            raise Exception("Finish this test - could do so while on the train")
+
+        epc_records = {"original_epc": test_case["epc"].copy(), "full_sap_epc": {}, "old_data": []}
+
+        epc_record = EPCRecord(
+            epc_records=epc_records,
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        p = Property(
+            id=0,
+            postcode=test_case["epc"]["postcode"],
+            address=test_case["epc"]["address"],
+            epc_record=epc_record,
+            energy_assessment={
+                "condition": {},
+                "energy_assessment_is_newer": False
+            }
+        )
+
+        # For these tests, this can be fixed
+        kwh_predictions = {
+            "heating_kwh_predictions": pd.DataFrame(
+                [
+                    {"id": p.uprn, "predictions": 12000}
+                ]
+            ),
+            "hotwater_kwh_predictions": pd.DataFrame(
+                [
+                    {"id": p.uprn, "predictions": 3000}
+                ]
+            ),
+        }
+
+        p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions)
+
+        recommender = HeatingRecommender(property_instance=p)
+        # Check they're empty
+        assert not recommender.heating_recommendations
+        assert not recommender.heating_control_recommendations
+
+        recommender.recommend(has_cavity_or_loft_recommendations=False)
+
+        assert len(recommender.heating_recommendations) == len(test_case["heating_recommendation_descriptions"])
+        assert (
+            len(recommender.heating_control_recommendations) ==
+            len(test_case["heating_controls_recommendation_descriptions"])
+        )
+
+        # Check the exact descriptions
+        assert (
+            {x["description"] for x in recommender.heating_recommendations} ==
+            set(test_case["heating_recommendation_descriptions"])
+        )
+
+        assert (
+            {x["description"] for x in recommender.heating_control_recommendations} ==
+            set(test_case["heating_controls_recommendation_descriptions"])
+        )
--- a/recommendations/wall_energy_efficiency_values.py
+++ b/recommendations/wall_energy_efficiency_values.py
@ -0,0 +1,56 @@
+cavity_wall_energy_eff = [
+    {'construction-age-band': 'England and Wales: 1950-1966', 'walls-energy-eff': 'Average', 'count': 605820},
+    {'construction-age-band': 'England and Wales: 1967-1975', 'walls-energy-eff': 'Average', 'count': 410998},
+    {'construction-age-band': 'England and Wales: 1930-1949', 'walls-energy-eff': 'Average', 'count': 263575},
+    {'construction-age-band': 'England and Wales: 1976-1982', 'walls-energy-eff': 'Good', 'count': 206654},
+    {'construction-age-band': 'England and Wales: 1983-1990', 'walls-energy-eff': 'Good', 'count': 106489},
+    {'construction-age-band': 'England and Wales: 1900-1929', 'walls-energy-eff': 'Average', 'count': 58399},
+    {'construction-age-band': 'England and Wales: 1991-1995', 'walls-energy-eff': 'Good', 'count': 58252},
+    {'construction-age-band': 'England and Wales: 1996-2002', 'walls-energy-eff': 'Good', 'count': 35141},
+    {'construction-age-band': 'England and Wales: 2003-2006', 'walls-energy-eff': 'Good', 'count': 7194},
+    {'construction-age-band': 'England and Wales: 2007-2011', 'walls-energy-eff': 'Good', 'count': 2639},
+    {'construction-age-band': 'England and Wales: before 1900', 'walls-energy-eff': 'Average', 'count': 2495},
+    {'construction-age-band': 'England and Wales: 2012 onwards', 'walls-energy-eff': 'Very Good', 'count': 1158},
+    {'construction-age-band': 'England and Wales: 2007 onwards', 'walls-energy-eff': 'Good', 'count': 357},
+    {'construction-age-band': 'INVALID!', 'walls-energy-eff': 'Very Good', 'count': 88}
+]
+
+iwi_energy_eff = [
+    {'construction-age-band': 'England and Wales: 1900-1929', 'walls-energy-eff': 'Good', 'count': 22415},
+    {'construction-age-band': 'England and Wales: before 1900', 'walls-energy-eff': 'Good',
+     'count': 13422},
+    {'construction-age-band': 'England and Wales: 1930-1949', 'walls-energy-eff': 'Good', 'count': 6640},
+    {'construction-age-band': 'England and Wales: 1950-1966', 'walls-energy-eff': 'Good', 'count': 1391},
+    {'construction-age-band': 'England and Wales: 1967-1975', 'walls-energy-eff': 'Good', 'count': 663},
+    {'construction-age-band': 'England and Wales: 2003-2006', 'walls-energy-eff': 'Very Good',
+     'count': 516},
+    {'construction-age-band': 'England and Wales: 2007-2011', 'walls-energy-eff': 'Very Good',
+     'count': 463},
+    {'construction-age-band': 'England and Wales: 2012 onwards', 'walls-energy-eff': 'Very Good',
+     'count': 353},
+    {'construction-age-band': 'England and Wales: 1996-2002', 'walls-energy-eff': 'Good', 'count': 218},
+    {'construction-age-band': 'England and Wales: 1983-1990', 'walls-energy-eff': 'Very Good',
+     'count': 166},
+    {'construction-age-band': 'England and Wales: 1976-1982', 'walls-energy-eff': 'Very Good',
+     'count': 121},
+    {'construction-age-band': 'England and Wales: 1991-1995', 'walls-energy-eff': 'Good', 'count': 104},
+    {'construction-age-band': 'England and Wales: 2007 onwards', 'walls-energy-eff': 'Very Good',
+     'count': 74}, {'construction-age-band': 'INVALID!', 'walls-energy-eff': 'Very Good', 'count': 26}
+]
+
+ewi_energy_eff = [
+    {'construction-age-band': 'England and Wales: 1900-1929', 'walls-energy-eff': 'Good', 'count': 18427},
+    {'construction-age-band': 'England and Wales: 1930-1949', 'walls-energy-eff': 'Good', 'count': 17803},
+    {'construction-age-band': 'England and Wales: 1950-1966', 'walls-energy-eff': 'Good', 'count': 4306},
+    {'construction-age-band': 'England and Wales: before 1900', 'walls-energy-eff': 'Good', 'count': 2955},
+    {'construction-age-band': 'England and Wales: 1967-1975', 'walls-energy-eff': 'Good', 'count': 647},
+    {'construction-age-band': 'England and Wales: 1976-1982', 'walls-energy-eff': 'Very Good', 'count': 188},
+    {'construction-age-band': 'England and Wales: 2007-2011', 'walls-energy-eff': 'Very Good', 'count': 73},
+    {'construction-age-band': 'England and Wales: 2003-2006', 'walls-energy-eff': 'Very Good', 'count': 49},
+    {'construction-age-band': 'England and Wales: 2012 onwards', 'walls-energy-eff': 'Very Good', 'count': 37},
+    {'construction-age-band': 'England and Wales: 1983-1990', 'walls-energy-eff': 'Good', 'count': 31},
+    {'construction-age-band': 'England and Wales: 1996-2002', 'walls-energy-eff': 'Very Good', 'count': 21},
+    {'construction-age-band': 'England and Wales: 1991-1995', 'walls-energy-eff': 'Good', 'count': 14},
+    {'construction-age-band': 'England and Wales: 2007 onwards', 'walls-energy-eff': 'Very Good', 'count': 8},
+    {'construction-age-band': 'INVALID!', 'walls-energy-eff': 'Very Good', 'count': 4}
+]
--- a/utils/s3.py
+++ b/utils/s3.py
@ -229,6 +229,39 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True):
    return df


+def save_excel_to_s3(df, bucket_name, file_key):
+    """
+    Save a pandas DataFrame as an Excel file on S3.
+
+    :param df: DataFrame to save.
+    :param bucket_name: S3 bucket name.
+    :param file_key: S3 file key. This includes the file name and path.
+    """
+    # Ensure the DataFrame is not empty
+    if df.empty:
+        raise ValueError("The DataFrame is empty. Nothing to save to Excel.")
+
+    # Ensure the file_key ends with an appropriate Excel file extension
+    if not file_key.endswith((".xls", ".xlsx")):
+        raise ValueError("The specified file key does not appear to be an Excel file.")
+
+    # Create a BytesIO buffer
+    output = BytesIO()
+    # Save DataFrame to an Excel file buffer
+    df.to_excel(output, index=False)
+    output.seek(0)  # Important: move back to the beginning of the buffer
+
+    # Initialize a session using boto3
+    session = boto3.session.Session()
+    s3 = session.resource('s3')
+
+    # Upload the Excel file from the buffer to S3
+    bucket = s3.Bucket(bucket_name)
+    bucket.put_object(Body=output, Key=file_key)
+
+    logger.info(f"Excel file saved to S3 bucket '{bucket_name}' with key '{file_key}'")
+
+
 def read_csv_from_s3(bucket_name, filepath):
    s3 = boto3.client('s3')

@ -276,3 +309,86 @@ def list_files_in_s3_folder(bucket_name, folder_name):
    except Exception as e:
        logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
        return []
+
+
+def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name):
+    """
+    List all files and immediate subfolders in a given folder in an S3 bucket.
+
+    E.g. if we have a folder structure in S3 like this:
+    - folder1/
+        - file1.csv
+        - file2.csv
+        - subfolder1/
+            - file3.csv
+
+    Then calling list_files_and_subfolders_in_s3_folder(bucket_name='my-bucket', folder_name='folder1/')
+    would return ['folder1/file1.csv', 'folder1/file2.csv', 'folder1/subfolder1/'].
+
+    Namely, the nested files are not included in the list, only the immediate files and subfolders.
+
+    :param bucket_name: The name of the S3 bucket.
+    :param folder_name: The folder name within the S3 bucket.
+    :return: A list of file keys and subfolder prefixes in the specified S3 folder.
+    """
+
+    # For this function, folder_name should end with a forward slash
+    if not folder_name.endswith('/'):
+        folder_name += '/'
+
+    try:
+        s3 = boto3.client('s3')
+        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/')
+
+        items = []
+
+        # Add files to the list
+        if 'Contents' in response:
+            items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name])
+
+        # Add immediate subfolders to the list
+        if 'CommonPrefixes' in response:
+            items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']])
+
+        return items
+
+    except NoCredentialsError:
+        logger.error("Credentials not available.")
+        return []
+    except PartialCredentialsError:
+        logger.error("Incomplete credentials provided.")
+        return []
+    except Exception as e:
+        logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}')
+        return []
+
+
+def list_xmls_in_s3_folder(bucket_name, folder_name):
+    """
+    List all XML files in a given folder in an S3 bucket.
+
+    :param bucket_name: The name of the S3 bucket.
+    :param folder_name: The folder name within the S3 bucket.
+    :return: A list of XML file keys in the specified S3 folder.
+    """
+    try:
+        s3 = boto3.client('s3')
+        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
+
+        if 'Contents' not in response:
+            logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
+            return []
+
+        # Filter XML files
+        xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')]
+        return xml_files
+
+    except NoCredentialsError:
+        logger.error("Credentials not available.")
+        return []
+    except PartialCredentialsError:
+        logger.error("Incomplete credentials provided.")
+        return []
+    except Exception as e:
+        logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
+        return []