Merge pull request #314 from Hestia-Homes/main

Major update, adding in wip solar api, kwh models, changes to ml models removing potential columns, new cost models and new ecr repos for cost models
2026-06-08 11:17:27 +00:00 · 2024-07-05 12:18:16 +01:00 · 2024-07-05 12:18:16 +01:00 · 5b37c528c8
commit 5b37c528c8
parent e2271cbdae 9e78d8d160
76 changed files with 6828 additions and 765 deletions
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -4,6 +4,9 @@
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="PyCharmProfessionalAdvertiser">
+    <option name="shown" value="true" />
+  </component>
  <component name="PythonCompatibilityInspectionAdvertiser">
    <option name="version" value="3" />
  </component>
--- a/backend/OrdnanceSurvey.py
+++ b/backend/OrdnanceSurvey.py
@ -38,7 +38,11 @@ class OrdnanceSuveyClient:
            raise ValueError("No results found - run get_places_api first")

        self.address_os = self.most_relevant_result["ADDRESS"]
-        self.postcode_os = self.most_relevant_result["POSTCODE"]
+
+        if "POSTCODE" in self.most_relevant_result:
+            self.postcode_os = self.most_relevant_result["POSTCODE"]
+        else:
+            self.postcode_os = self.most_relevant_result["POSTCODE_LOCATOR"]
        # We strip out the postcode from the address as this is already stored separately
        self.address_os = self.address_os.replace(self.postcode_os, "").strip()
        # Remove trailing comma
@ -49,7 +53,7 @@ class OrdnanceSuveyClient:
        self.postcode_os = self.postcode_os.upper()

    @lru_cache(maxsize=128)
-    def get_places_api(self):
+    def get_places_api(self, filter_by_postcode=False):
        """
        This method is tasked with getting the places api from the Ordnance Survey.
        """
@ -58,16 +62,35 @@ class OrdnanceSuveyClient:
            raise ValueError("Ordnance Survey API key not specified")

        encoded_address_query = urllib.parse.quote(self.full_address)
-        url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
-               f"{self.api_key}")
+
+        url = (
+            f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&dataset=DPA,LPI&matchprecision=10"
+            f"&key={self.api_key}"
+        )
+
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
-            results = data['results']
+            res = data["results"]
+
+            if filter_by_postcode:
+                results = []
+                for r in res:
+                    if "DPA" in r:
+                        if r["DPA"]["POSTCODE"] == self.postcode:
+                            results.append(r)
+                    elif "LPI" in r:
+                        if r["LPI"]["POSTCODE_LOCATOR"] == self.postcode:
+                            results.append(r)
+                    else:
+                        raise ValueError("Could not find postcode in either DPA or LPI")
+            else:
+                results = res
+
            self.results = results

            # Extract some details about the best match
-            self.most_relevant_result = self.results[0]["DPA"]
+            self.most_relevant_result = self.results[0]["DPA"] if "DPA" in self.results[0] else self.results[0]["LPI"]

            self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
            self.set_places_address()
@ -94,11 +117,14 @@ class OrdnanceSuveyClient:
        value_map = {
            # In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
            'RD': {},
-            'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
-            'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
+            'RD02': {'property_type': 'House', 'built_form': 'Detached'},
+            'RD03': {'property_type': 'House', 'built_form': 'Semi-Detached'},
            'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
            'RD06': {'property_type': 'Flat'},
        }
+        # Other classifications can be found in here:
+        # https://osdatahub.os.uk/docs/places/technicalSpecification in the CLASSIFICATION_CODE description.
+        # A lookup table csv can be downloaded which contains all of the codes

        mapped = value_map.get(classification_code, {})
        self.property_type = mapped.get("property_type", "")
--- a/backend/Property.py
+++ b/backend/Property.py
@ -18,6 +18,7 @@ from recommendations.recommendation_utils import (
    esimtate_pitched_roof_area,
    estimate_windows,
 )
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings

 ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
 DATA_BUCKET = os.environ.get(
@ -93,7 +94,10 @@ class Property:
            non_invasive_recommendations else []
        )
        # This is a list of measures that have been recommended for the property
-        self.measures = ast.literal_eval(measures) if measures else None
+        if isinstance(measures, list):
+            self.measures = measures
+        else:
+            self.measures = ast.literal_eval(measures) if measures else None

        self.uprn = epc_record.get("uprn")
        self.full_sap_epc = epc_record.get("full_sap_epc")
@ -159,6 +163,9 @@ class Property:
        self.current_energy_bill = None
        self.expected_energy_bill = None

+        self.heating_energy_source = None
+        self.hot_water_energy_source = None
+
        self.recommendations_scoring_data = []

        self.parse_kwargs(kwargs)
@ -200,11 +207,11 @@ class Property:
        # difference_record = self.epc_record - self.epc_record

        # TODO: change these lower and replace in the settings file
-        print(
-            "CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING"
-        )
+        # print(
+        #     "CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING"
+        # )
        fixed_data_col_names = MANDATORY_FIXED_FEATURES + LATEST_FIELD
-        print("NEED TO CHANGE THE DASH TO LOWER CASE")
+        # print("NEED TO CHANGE THE DASH TO LOWER CASE")
        fixed_data_col_names = [
            x.lower().replace("_", "-") for x in fixed_data_col_names
        ]
@ -582,6 +589,26 @@ class Property:
            floor_area_decile_thresholds=floor_area_decile_thresholds,
        )
        self.set_energy_source()
+        self.find_energy_sources()
+        self.set_current_energy_bill()
+
+    def set_current_energy_bill(self):
+        """
+        Given what we know about the property now, estimates the current energy consumption using the UCL paper
+        https://www.sciencedirect.com/science/article/pii/S0378778823002542
+        :return:
+        """
+        starting_heat_demand = (
+            float(self.data["energy-consumption-current"]) * self.floor_area
+        )
+
+        self.current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+            epc_energy_consumption=starting_heat_demand,
+            current_epc_rating=self.data["current-energy-rating"],
+            total_floor_area=self.floor_area
+        )
+
+        self.current_energy_bill = AnnualBillSavings.calculate_annual_bill(self.current_adjusted_energy)

    def set_spatial(self, spatial: pd.DataFrame):
        """
@ -844,8 +871,8 @@ class Property:
        # where a property is marked as being on the first floor
        if self.floor_level > 0:

-            # We check if there is another property below
-            if not self.floor["another_property_below"]:
+            # We check if there is another property below (for a non-sap assessment)
+            if not self.floor["another_property_below"] and self.floor["thermal_transmittance_unit"] is None:
                self.floor_level = 0
            return

@ -902,14 +929,13 @@ class Property:
        return component_data

    def set_adjusted_energy(
-        self, current_adjusted_energy, expected_adjusted_energy, current_energy_bill, expected_energy_bill
+        self, expected_adjusted_energy, expected_energy_bill
    ):
        """
        Stores these values for usage later
        """
-        self.current_adjusted_energy = current_adjusted_energy
+
        self.expected_adjusted_energy = expected_adjusted_energy
-        self.current_energy_bill = current_energy_bill
        self.expected_energy_bill = expected_energy_bill

    def set_windows_count(self):
@ -924,7 +950,6 @@ class Property:
            construction_age_band=self.construction_age_band,
            floor_area=self.floor_area,
            number_habitable_rooms=self.number_of_rooms,
-            extension_count=float(self.data["extension-count"]),
        )

    def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
@ -990,3 +1015,66 @@ class Property:

        # Set the energy source based on the conditions above
        self.energy_source = energy_source
+
+    def find_energy_sources(self):
+        # Based on the heating and the hot water
+        heating_fuel_mapping = {
+            'has_mains_gas': 'Natural Gas',
+            'has_electric': 'Electricity',
+            'has_oil': 'Oil',
+            'has_wood_logs': 'Wood Logs',
+            'has_coal': 'Coal',
+            'has_anthracite': 'Anthracite',
+            'has_smokeless_fuel': 'Smokeless Fuel',
+            'has_lpg': 'LPG',
+            'has_b30k': 'B30K Biofuel',
+            'has_air_source_heat_pump': 'Electricity',
+            'has_ground_source_heat_pump': 'Electricity',
+            'has_water_source_heat_pump': 'Electricity',
+            'has_electric_heat_pump': 'Electricity',
+            'has_solar_assisted_heat_pump': 'Electricity',
+            'has_exhaust_source_heat_pump': 'Electricity',
+            'has_community_heat_pump': 'Electricity',
+            'has_wood_pellets': 'Wood Pellets',
+            'has_community_scheme': 'Varied (Community Scheme)'
+        }
+
+        # Hot water
+        heater_type_to_fuel = {
+            'gas instantaneous': 'Natural Gas',
+            'electric heat pump': 'Electricity',
+            'electric immersion': 'Electricity',
+            'gas boiler': 'Natural Gas',
+            'oil boiler': 'Oil',
+            'electric instantaneous': 'Electricity',
+            'gas multipoint': 'Natural Gas',
+            'heat pump': 'Electricity',
+            'solid fuel boiler': 'Solid Fuel',
+            'solid fuel range cooker': 'Solid Fuel',
+            'room heaters': 'Varied'  # Could be any fuel, further specifics needed based on context
+        }
+
+        # Define a mapping from system types to general categories or modifications of fuel types
+        system_type_modification = {
+            'from main system': 'Main System',
+            'from secondary system': 'Secondary System',
+            'from second main heating system': 'Secondary System',
+            'community scheme': 'Community Scheme'
+        }
+
+        self.heating_energy_source = [
+            fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
+        ]
+        if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
+            raise Exception("Investigate em")
+
+        self.heating_energy_source = self.heating_energy_source[0]
+
+        if self.hotwater["heater_type"] is not None:
+            self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
+        else:
+            fuel = system_type_modification[self.hotwater["system_type"]]
+            if fuel == 'Main System':
+                self.hot_water_energy_source = self.heating_energy_source
+            else:
+                raise Exception("Investiage me")
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@ -11,6 +11,7 @@ from BaseUtility import Definitions
 from utils.logger import setup_logger
 from typing import List
 from fuzzywuzzy import process
+from backend.app.utils import sap_to_epc

 logger = setup_logger()

@ -190,15 +191,15 @@ class SearchEpc:
        self.property_type = property_type
        self.fast = fast

-    @classmethod
-    def get_house_number(cls, address: str) -> str | None:
+    @staticmethod
+    def get_house_number(address: str, postcode=None) -> str | None:
        """
        This method uses the usaddress library to parse an address and extract the primary house or flat number.
        """
-        try:

-            # Custom regex to catch a broad range of cases
-            pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
+        try:
+            # Updated regex to catch house numbers including alphanumeric ones
+            pattern = r'(?i)(?:flat|apartment)\s*(\d+\w*)|^\s*(\d+\w*)'
            match = re.search(pattern, address)
            if match:
                return next(g for g in match.groups() if g is not None)
@ -207,6 +208,11 @@ class SearchEpc:
            # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
            for part, type_ in parsed:
                if type_ == 'OccupancyIdentifier':
+                    if postcode is not None:
+                        if part == postcode.split(" ")[0]:
+                            continue
+                        if part == postcode.split(" ")[1]:
+                            continue
                    return part  # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
                    # number

@ -216,7 +222,7 @@ class SearchEpc:
                return address_number.replace(",", "")  # Remove any trailing commas

        except Exception as e:
-            print(f"Error parsing address: {e}")
+            raise Exception(f"Error parsing address: {e}")

        return None

@ -428,7 +434,8 @@ class SearchEpc:
        self, initial_postcode: str,
        lmks_to_drop: list[str] | None = None,
        built_form: str = "",
-        property_type: str = ""
+        property_type: str = "",
+        exclude_old: bool = False
    ):
        """
        Fetches and processes EPC data for a given initial postcode, applying successive trimming
@ -447,6 +454,7 @@ class SearchEpc:
        :param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
        :param built_form: The 'built-form' value to be used for filtering the EPC data.
        :param property_type: The 'property-type' value to be used for filtering the EPC data.
+        :param exclude_old: Flag to exclude EPC data older than 10 years.
        :return:
        """

@ -474,9 +482,23 @@ class SearchEpc:
                if lmks_to_drop is not None:
                    epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]

+                try:
+                    epc_data['lodgement-datetime'] = pd.to_datetime(
+                        epc_data['lodgement-datetime'], format='%Y-%m-%d %H:%M:%S', errors='coerce'
+                    )
+                except Exception as e:
+                    logger.error("Problem formatting lodgement-datime, appling fallback: " + str(e))
+                    epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], errors='coerce')
+
+                if exclude_old:
+                    # Exclude EPC data older than 10 years
+                    epc_data = epc_data[
+                        epc_data["lodgement-datetime"] > (pd.Timestamp.now() - pd.DateOffset(years=10))
+                        ]
+
                if not epc_data.empty:
                    # Further processing of the EPC data
-                    epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], errors='coerce')
+
                    epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
                    epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
                    epc_data["numeric_house_number"] = epc_data["house_number"].apply(
@ -554,7 +576,7 @@ class SearchEpc:
        # If loop finishes without a valid response, raise an exception
        raise Exception("Unable to find postcode data after trimming - investigate me")

-    def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
+    def estimate_epc(self, property_type, built_form, lmks_to_drop=None, exclude_old=False):
        """
        For a property that does not have an EPC, we retrieve the EPC data for the closest properties
        and estimate the EPC for the property in question.
@ -567,6 +589,7 @@ class SearchEpc:
                                the ordnance survey api
        :param lmks_to_drop:    This is a list of LMK keys that should be dropped from the estimation process. This
                                is used as an override for testing, to drop EPCs for the property we are testing
+        :param exclude_old:     Used to drop any expired EPCs (more than 10 years old)
        :return:
        """

@ -576,7 +599,8 @@ class SearchEpc:
            initial_postcode=self.postcode,
            lmks_to_drop=lmks_to_drop,
            built_form=built_form,
-            property_type=property_type
+            property_type=property_type,
+            exclude_old=exclude_old
        )

        # If we have missing lodgment date, we fill it with inspection-date
@ -624,6 +648,8 @@ class SearchEpc:
        else:
            estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")

+        estimated_epc["current-energy-rating"] = sap_to_epc(estimated_epc["current-energy-efficiency"])
+
        estimated_epc["postcode"] = self.postcode
        estimated_epc["uprn"] = self.uprn
        estimated_epc["address"] = self.full_address
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@ -1,336 +1,333 @@
-from backend.Property import Property
-from backend.SearchEpc import SearchEpc
-from etl.epc.Record import EPCRecord
-from dotenv import load_dotenv
-from utils.s3 import read_dataframe_from_s3_parquet
-import os
+import pandas as pd
+import numpy as np
+from recommendations.Costs import MCS_SOLAR_PV_COST_DATA
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 import requests
+from functools import lru_cache
+import time

-load_dotenv(dotenv_path="backend/.env")
-EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")

-# This is for 6 Laura Close, Tintagel, PL34 0EB (same property that Cotswolrd energy used)
-uprn = 100040099104
-# This is for 353A, Hermitage Lane, ME16 9NT (one of the e.on properties)
-uprn = 200000964454
+class GoogleSolarApi:
+    NORTH_FACING_AZIMUTH_RANGE = (-30, 30)

-cleaning_data = read_dataframe_from_s3_parquet(
-    bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
-)
+    # Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
+    # be exported
+    SOLAR_CONSUMPTION_PROPORTION = 0.5

-searcher = SearchEpc(address1="", postcode="", uprn=uprn, auth_token=EPC_AUTH_TOKEN, os_api_key="")
+    # These are variables, described in the documentation for cost analysis for non-us locations, seen here
+    # https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
+    # We use the default figures that the API uses for US locations

-searcher.find_property(skip_os=True)
+    # The factor by which the cost of electricity increases annually. The Solar API uses 1.022 (2.2% annual increase)
+    # for US locations.
+    cost_increase_factor = 1.022

-epc_records = {
-    'original_epc': searcher.newest_epc.copy(),
-    'full_sap_epc': searcher.full_sap_epc.copy(),
-    'old_data': searcher.older_epcs.copy(),
-}
+    # The efficiency at which an inverter converts the DC electricity that is produced by the solar panels to the AC
+    # electricity that is used in a household. The Solar API uses 85% for US locations. We use 0.95.5 which is the
+    # middle value of the 93-98% range, cited by Sunsave:
+    # https://www.sunsave.energy/solar-panels-advice/system-size/inverters
+    dc_to_ac_rate = 0.955

-epc = EPCRecord(
-    epc_records=epc_records,
-    run_mode="newdata",
-    cleaning_data=cleaning_data
-)
+    # The Solar API uses 1.04 (4% annual increase) for US locations
+    discount_rate = 1.04

-uprn_filenames = read_dataframe_from_s3_parquet(
-    bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
-)
+    # How much the efficiency of the solar panels declines each year. The Solar API uses 0.995 (0.5% annual decrease)
+    # for US locations
+    efficiency_depreciation_factor = 0.995

-p = Property(
-    id=0,
-    address=searcher.address_clean,
-    postcode=searcher.postcode_clean,
-    epc_record=epc,
-    already_installed={},
-    non_invasive_recommendations={},
-)
+    # The expected lifespan of the solar installation. The Solar API uses 20 years. Adjust this value as needed for
+    # your area
+    installation_life_span = 20

-p.get_spatial_data(uprn_filenames)
+    def __init__(self, api_key, max_retries=5):
+        """
+        Initialize the GoogleSolarApi class with the provided API key and maximum retries.

-longitude = p.spatial["longitude"]
-latitude = p.spatial["latitude"]
+        :param api_key: The API key to authenticate requests to the Google Solar API.
+        :param max_retries: The maximum number of retries for the API request (default is 5).
+        """
+        self.api_key = api_key
+        self.max_retries = max_retries
+        self.base_url = "https://solar.googleapis.com/v1"

-api_key = "AIzaSyCIz8Psu5h-1txuDX0rQpUTgkvdj8yohqU"
-url = 'https://solar.googleapis.com/v1/solarPotential'
-params = {
-    'location.latitude': f'{latitude:.5f}',
-    'location.longitude': f'{longitude:.5f}',
-    'requiredQuality': "MEDIUM",
-    'key': api_key
-}
+        self.insights_data = None
+        self.roof_segments = []

-insights_url = 'https://solar.googleapis.com/v1/buildingInsights:findClosest'
+        # property attributes:
+        self.floor_area = None
+        self.roof_area = None
+        self.roof_segment_indexes = None
+        self.panel_area = None
+        self.panel_wattage = None
+        self.panel_performance = None

-# Make the GET request to the Solar API
-insights_response = requests.get(insights_url, params=params)
-insights_data = insights_response.json()
+    def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None):
+        """
+        Make an API request to retrieve building insights based on the given longitude and latitude, with retry
+        mechanism.

-solar_potential = insights_data["solarPotential"]
+        :param longitude: The longitude of the location.
+        :param latitude: The latitude of the location.
+        :param required_quality: The required quality of the data (default is "MEDIUM").
+        :param max_retries: The maximum number of retries for the API request (default is None, which uses the
+        instance's max_retries).
+        :return: The JSON response containing the building insights data.
+        """
+        if max_retries is None:
+            max_retries = self.max_retries

-from pprint import pprint
+        insights_url = f"{self.base_url}/buildingInsights:findClosest"
+        params = {
+            'location.latitude': f'{latitude:.5f}',
+            'location.longitude': f'{longitude:.5f}',
+            'requiredQuality': required_quality,
+            'key': self.api_key
+        }

-pprint(solar_potential)
+        attempt = 0
+        while attempt < max_retries:
+            try:
+                response = requests.get(insights_url, params=params)
+                response.raise_for_status()  # Raise an error for bad status codes
+                return response.json()
+            except requests.exceptions.RequestException as e:
+                attempt += 1
+                print(f"Attempt {attempt} failed: {e}")
+                time.sleep(2 ** attempt)  # Exponential backoff
+                if attempt >= max_retries:
+                    raise

-# This is the size of the panels used in the calculation - 400 watt
-solar_potential["panelCapacityWatts"]
-# Height of the panels used
-solar_potential["panelHeightMeters"]
-# Width of the panels used
-solar_potential["panelWidthMeters"]
+    @lru_cache(maxsize=128)
+    def get(self, longitude, latitude, required_quality="MEDIUM"):
+        """
+        Wrapper function that calls get_building_insights and extracts roof segments, with caching.

-solar_potential["wholeRoofStats"]
+        :param longitude: The longitude of the location.
+        :param latitude: The latitude of the location.
+        :param required_quality: The required quality of the data (default is "MEDIUM").
+        :return: The JSON response containing the building insights data.
+        """

-# Copy of response for testing - 6 Laura Close, Tintagel, PL34 0EB
-# {'name': 'buildings/ChIJ2yC6t4KEa0gRh2TIssogI7k', 'center': {'latitude': 50.667375, 'longitude': -4.7416833},
-# 'imageryDate': {'year': 2021, 'month': 7, 'day': 19}, 'regionCode': 'GB', 'solarPotential': {'maxArrayPanelsCount':
-# 39, 'maxArrayAreaMeters2': 76.578636, 'maxSunshineHoursPerYear': 1172.0627, 'carbonOffsetFactorKgPerMwh':
-# 478.99942, 'wholeRoofStats': {'areaMeters2': 129.65686, 'sunshineQuantiles': [537, 738.3836, 805.62445, 842.6802,
-# 909.8431, 972.15234, 1036.1013, 1092.051, 1135.8192, 1163.1444, 1193.6012], 'groundAreaMeters2': 112.33},
-# 'roofSegmentStats': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'stats': {'areaMeters2': 44.08321,
-# 'sunshineQuantiles': [614, 940.86975, 982.39124, 1057.0664, 1109.6869, 1137.5837, 1152.9211, 1163.1106, 1168.2212,
-# 1170.8883, 1193.6012], 'groundAreaMeters2': 37.61}, 'center': {'latitude': 50.6673664, 'longitude':
-# -4.741714099999999}, 'boundingBox': {'sw': {'latitude': 50.6673354, 'longitude': -4.741777}, 'ne': {'latitude':
-# 50.6674029, 'longitude': -4.7416472}}, 'planeHeightAtCenterMeters': 93.0221}, {'pitchDegrees': 34.39779,
-# 'azimuthDegrees': 31.74401, 'stats': {'areaMeters2': 44.622986, 'sunshineQuantiles': [537, 671.49774, 733.84985,
-# 780.82733, 801.4026, 814.0189, 824.0077, 847.77484, 895.08295, 950.1469, 1123.3503], 'groundAreaMeters2': 36.82},
-# 'center': {'latitude': 50.6673966, 'longitude': -4.7416813}, 'boundingBox': {'sw': {'latitude': 50.667361,
-# 'longitude': -4.7417497}, 'ne': {'latitude': 50.6674303, 'longitude': -4.741615599999999}},
-# 'planeHeightAtCenterMeters': 92.87593}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'stats': {
-# 'areaMeters2': 17.074476, 'sunshineQuantiles': [644.71136, 731.0546, 782.89813, 842.7107, 908.55585, 966.6212,
-# 1010.6367, 1038.2543, 1053.2788, 1090.6831, 1128.0178], 'groundAreaMeters2': 17.050001}, 'center': {'latitude':
-# 50.66740850000001, 'longitude': -4.7416025}, 'boundingBox': {'sw': {'latitude': 50.6673895, 'longitude':
-# -4.7416436}, 'ne': {'latitude': 50.667431199999996, 'longitude': -4.7415572}}, 'planeHeightAtCenterMeters':
-# 90.630356}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'stats': {'areaMeters2': 13.501617,
-# 'sunshineQuantiles': [749, 976.85345, 1059.0062, 1081.6173, 1097.4441, 1110.3171, 1128.2186, 1133.9421, 1142.068,
-# 1148.2168, 1157.632], 'groundAreaMeters2': 12.02}, 'center': {'latitude': 50.667315699999996, 'longitude':
-# -4.741675400000001}, 'boundingBox': {'sw': {'latitude': 50.667291399999996, 'longitude': -4.7417066},
-# 'ne': {'latitude': 50.6673372, 'longitude': -4.741648400000001}}, 'planeHeightAtCenterMeters': 92.36334},
-# {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334, 'stats': {'areaMeters2': 10.374564, 'sunshineQuantiles': [
-# 617.9507, 752.2504, 847.66315, 872.0505, 881.26227, 900.9639, 933.3188, 967.4747, 1000.8129, 1038.3002, 1105.545],
-# 'groundAreaMeters2': 8.83}, 'center': {'latitude': 50.6673295, 'longitude': -4.7417128}, 'boundingBox': {'sw': {
-# 'latitude': 50.6673134, 'longitude': -4.7417422}, 'ne': {'latitude': 50.6673413, 'longitude': -4.7416775}},
-# 'planeHeightAtCenterMeters': 92.31146}], 'solarPanelConfigs': [{'panelsCount': 4, 'yearlyEnergyDcKwh': 1867.1516,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 4,
-# 'yearlyEnergyDcKwh': 1867.1515, 'segmentIndex': 0}]}, {'panelsCount': 5, 'yearlyEnergyDcKwh': 2335.0068,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 5,
-# 'yearlyEnergyDcKwh': 2335.0068, 'segmentIndex': 0}]}, {'panelsCount': 6, 'yearlyEnergyDcKwh': 2799.8508,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 6,
-# 'yearlyEnergyDcKwh': 2799.8508, 'segmentIndex': 0}]}, {'panelsCount': 7, 'yearlyEnergyDcKwh': 3264.6506,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 7,
-# 'yearlyEnergyDcKwh': 3264.6506, 'segmentIndex': 0}]}, {'panelsCount': 8, 'yearlyEnergyDcKwh': 3726.2405,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 8,
-# 'yearlyEnergyDcKwh': 3726.2405, 'segmentIndex': 0}]}, {'panelsCount': 9, 'yearlyEnergyDcKwh': 4187.721,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 9,
-# 'yearlyEnergyDcKwh': 4187.721, 'segmentIndex': 0}]}, {'panelsCount': 10, 'yearlyEnergyDcKwh': 4646.094,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 10,
-# 'yearlyEnergyDcKwh': 4646.094, 'segmentIndex': 0}]}, {'panelsCount': 11, 'yearlyEnergyDcKwh': 5103.777,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 10,
-# 'yearlyEnergyDcKwh': 4646.094, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 1, 'yearlyEnergyDcKwh': 457.68268, 'segmentIndex': 3}]}, {'panelsCount': 12, 'yearlyEnergyDcKwh':
-# 5559.845, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 10,
-# 'yearlyEnergyDcKwh': 4646.094, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 913.7509, 'segmentIndex': 3}]}, {'panelsCount': 13, 'yearlyEnergyDcKwh':
-# 6013.053, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 11,
-# 'yearlyEnergyDcKwh': 5099.302, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 913.7509, 'segmentIndex': 3}]}, {'panelsCount': 14, 'yearlyEnergyDcKwh':
-# 6461.664, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 12,
-# 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 913.7509, 'segmentIndex': 3}]}, {'panelsCount': 15, 'yearlyEnergyDcKwh':
-# 6902.33, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 12,
-# 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
-# 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 16, 'yearlyEnergyDcKwh':
-# 7321.6436, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 12,
-# 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099,
-# 'panelsCount': 1, 'yearlyEnergyDcKwh': 419.31348, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees':
-# 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 17,
-# 'yearlyEnergyDcKwh': 7740.388, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331,
-# 'panelsCount': 12, 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775,
-# 'azimuthDegrees': 301.1099, 'panelsCount': 2, 'yearlyEnergyDcKwh': 838.0579, 'segmentIndex': 2}, {'pitchDegrees':
-# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]},
-# {'panelsCount': 18, 'yearlyEnergyDcKwh': 8154.265, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 13, 'yearlyEnergyDcKwh': 5961.7896, 'segmentIndex': 0},
-# {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 2, 'yearlyEnergyDcKwh': 838.0579,
-# 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh':
-# 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 19, 'yearlyEnergyDcKwh': 8566.032, 'roofSegmentSummaries': [{
-# 'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 14, 'yearlyEnergyDcKwh': 6373.556,
-# 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 2, 'yearlyEnergyDcKwh':
-# 838.0579, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 20, 'yearlyEnergyDcKwh': 8976.624,
-# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 838.0579, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees':
-# 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 21,
-# 'yearlyEnergyDcKwh': 9380.78, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331,
-# 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775,
-# 'azimuthDegrees': 301.1099, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1242.214, 'segmentIndex': 2}, {'pitchDegrees':
-# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]},
-# {'panelsCount': 22, 'yearlyEnergyDcKwh': 9784.078, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 4, 'yearlyEnergyDcKwh': 1645.5122,
-# 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh':
-# 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 23, 'yearlyEnergyDcKwh': 10162.354, 'roofSegmentSummaries': [{
-# 'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484,
-# 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 4, 'yearlyEnergyDcKwh':
-# 1645.5122, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 1, 'yearlyEnergyDcKwh': 378.2754, 'segmentIndex': 4}]}, {'panelsCount': 24, 'yearlyEnergyDcKwh':
-# 10535.894, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099,
-# 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees':
-# 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294,
-# 'azimuthDegrees': 308.42334, 'panelsCount': 1, 'yearlyEnergyDcKwh': 378.2754, 'segmentIndex': 4}]}, {'panelsCount':
-# 25, 'yearlyEnergyDcKwh': 10901.273, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees':
-# 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775,
-# 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees':
-# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3},
-# {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497,
-# 'segmentIndex': 4}]}, {'panelsCount': 26, 'yearlyEnergyDcKwh': 11242.756, 'roofSegmentSummaries': [{'pitchDegrees':
-# 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 1, 'yearlyEnergyDcKwh': 341.4827,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 27, 'yearlyEnergyDcKwh':
-# 11579.401, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 678.1277, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 28, 'yearlyEnergyDcKwh': 11919.106, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1017.83356,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 29, 'yearlyEnergyDcKwh':
-# 12255.358, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 4, 'yearlyEnergyDcKwh': 1354.0854, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 30, 'yearlyEnergyDcKwh': 12586.448, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 5, 'yearlyEnergyDcKwh': 1685.1748,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 31, 'yearlyEnergyDcKwh':
-# 12911.502, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 6, 'yearlyEnergyDcKwh': 2010.2289, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 32, 'yearlyEnergyDcKwh': 13233.139, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 7, 'yearlyEnergyDcKwh': 2331.8652,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 33, 'yearlyEnergyDcKwh':
-# 13554.602, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 8, 'yearlyEnergyDcKwh': 2653.3286, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 34, 'yearlyEnergyDcKwh': 13893.903, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 9, 'yearlyEnergyDcKwh': 2992.6301,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 35, 'yearlyEnergyDcKwh':
-# 14221.166, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 10, 'yearlyEnergyDcKwh': 3319.893, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
-# {'panelsCount': 36, 'yearlyEnergyDcKwh': 14536.154, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
-# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 11, 'yearlyEnergyDcKwh': 3634.8809,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 37, 'yearlyEnergyDcKwh':
-# 14850.317, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 12, 'yearlyEnergyDcKwh': 3949.0444, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775,
-# 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees':
-# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3},
-# {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497,
-# 'segmentIndex': 4}]}, {'panelsCount': 38, 'yearlyEnergyDcKwh': 15160.658, 'roofSegmentSummaries': [{'pitchDegrees':
-# 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
-# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 13, 'yearlyEnergyDcKwh': 4259.385,
-# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
-# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
-# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
-# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 39, 'yearlyEnergyDcKwh':
-# 15438.986, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
-# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
-# 'panelsCount': 14, 'yearlyEnergyDcKwh': 4537.713, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
-# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
-# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
-# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}],
-# 'panelCapacityWatts': 400, 'panelHeightMeters': 1.879, 'panelWidthMeters': 1.045, 'panelLifetimeYears': 20,
-# 'buildingStats': {'areaMeters2': 138.38115, 'sunshineQuantiles': [537, 728.5604, 799.23975, 833.99713, 900.88086,
-# 959.65875, 1024.2743, 1086.1285, 1132.8774, 1162.1904, 1193.6012], 'groundAreaMeters2': 117.16}, 'solarPanels': [{
-# 'center': {'latitude': 50.667371499999994, 'longitude': -4.7417235}, 'orientation': 'LANDSCAPE',
-# 'yearlyEnergyDcKwh': 468.5037, 'segmentIndex': 0}, {'center': {'latitude': 50.6673614, 'longitude': -4.7417023},
-# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 467.61072, 'segmentIndex': 0}, {'center': {'latitude':
-# 50.667365100000005, 'longitude': -4.7417311}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 465.55005,
-# 'segmentIndex': 0}, {'center': {'latitude': 50.6673512, 'longitude': -4.741681000000001}, 'orientation':
-# 'LANDSCAPE', 'yearlyEnergyDcKwh': 465.48712, 'segmentIndex': 0}, {'center': {'latitude': 50.667357599999995,
-# 'longitude': -4.7416734}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 467.8553, 'segmentIndex': 0},
-# {'center': {'latitude': 50.6673779, 'longitude': -4.741715999999999}, 'orientation': 'LANDSCAPE',
-# 'yearlyEnergyDcKwh': 464.84396, 'segmentIndex': 0}, {'center': {'latitude': 50.6673678, 'longitude': -4.7416947},
-# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 464.79984, 'segmentIndex': 0}, {'center': {'latitude': 50.6673549,
-# 'longitude': -4.7417098}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 461.58975, 'segmentIndex': 0},
-# {'center': {'latitude': 50.6673816, 'longitude': -4.7417448}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh':
-# 461.48065, 'segmentIndex': 0}, {'center': {'latitude': 50.6673881, 'longitude': -4.7417372}, 'orientation':
-# 'LANDSCAPE', 'yearlyEnergyDcKwh': 458.3733, 'segmentIndex': 0}, {'center': {'latitude': 50.6673149, 'longitude':
-# -4.7416768}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 457.68268, 'segmentIndex': 3}, {'center': {
-# 'latitude': 50.6673204, 'longitude': -4.7416867}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 456.06827,
-# 'segmentIndex': 3}, {'center': {'latitude': 50.667375199999995, 'longitude': -4.7417524}, 'orientation':
-# 'LANDSCAPE', 'yearlyEnergyDcKwh': 453.20776, 'segmentIndex': 0}, {'center': {'latitude': 50.667364, 'longitude':
-# -4.7416659}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 448.61087, 'segmentIndex': 0}, {'center': {
-# 'latitude': 50.6673094, 'longitude': -4.741666899999999}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh':
-# 440.66626, 'segmentIndex': 3}, {'center': {'latitude': 50.667403799999995, 'longitude': -4.741588900000001},
-# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 419.31348, 'segmentIndex': 2}, {'center': {'latitude':
-# 50.66740850000001, 'longitude': -4.7416016999999995}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 418.74448,
-# 'segmentIndex': 2}, {'center': {'latitude': 50.6673688, 'longitude': -4.7417599}, 'orientation': 'LANDSCAPE',
-# 'yearlyEnergyDcKwh': 413.877, 'segmentIndex': 0}, {'center': {'latitude': 50.667348499999996, 'longitude':
-# -4.7417174}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 411.76657, 'segmentIndex': 0}, {'center': {
-# 'latitude': 50.6673587, 'longitude': -4.7417387}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 410.5925,
-# 'segmentIndex': 0}, {'center': {'latitude': 50.6673992, 'longitude': -4.7415761}, 'orientation': 'LANDSCAPE',
-# 'yearlyEnergyDcKwh': 404.15607, 'segmentIndex': 2}, {'center': {'latitude': 50.6674132, 'longitude': -4.7416145},
-# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 403.29822, 'segmentIndex': 2}, {'center': {'latitude': 50.6673324,
-# 'longitude': -4.7417015}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 378.2754, 'segmentIndex': 4}, {'center':
-# {'latitude': 50.667417799999996, 'longitude': -4.7416273}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh':
-# 373.53967, 'segmentIndex': 2}, {'center': {'latitude': 50.667324900000004, 'longitude': -4.7417104}, 'orientation':
-# 'PORTRAIT', 'yearlyEnergyDcKwh': 365.37958, 'segmentIndex': 4}, {'center': {'latitude': 50.6674043, 'longitude':
-# -4.741680800000001}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 341.4827, 'segmentIndex': 1}, {'center': {
-# 'latitude': 50.667392299999996, 'longitude': -4.7416919}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh':
-# 336.64502, 'segmentIndex': 1}, {'center': {'latitude': 50.667397, 'longitude': -4.741704599999999}, 'orientation':
-# 'PORTRAIT', 'yearlyEnergyDcKwh': 339.7059, 'segmentIndex': 1}, {'center': {'latitude': 50.6674018, 'longitude':
-# -4.7417174}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 336.25195, 'segmentIndex': 1}, {'center': {'latitude':
-# 50.6673875, 'longitude': -4.7416791}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 331.08936, 'segmentIndex':
-# 1}, {'center': {'latitude': 50.6674065, 'longitude': -4.7417301}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh':
-# 325.05405, 'segmentIndex': 1}, {'center': {'latitude': 50.6673828, 'longitude': -4.7416664}, 'orientation':
-# 'PORTRAIT', 'yearlyEnergyDcKwh': 321.63647, 'segmentIndex': 1}, {'center': {'latitude': 50.667378, 'longitude':
-# -4.741653599999999}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 321.46332, 'segmentIndex': 1}, {'center': {
-# 'latitude': 50.667373299999994, 'longitude': -4.7416409}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 339.3016,
-# 'segmentIndex': 1}, {'center': {'latitude': 50.6673853, 'longitude': -4.7416298}, 'orientation': 'PORTRAIT',
-# 'yearlyEnergyDcKwh': 327.26282, 'segmentIndex': 1}, {'center': {'latitude': 50.667399499999995, 'longitude':
-# -4.741668}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 314.9878, 'segmentIndex': 1}, {'center': {'latitude':
-# 50.6673948, 'longitude': -4.7416553}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 314.16364, 'segmentIndex':
-# 1}, {'center': {'latitude': 50.667390000000005, 'longitude': -4.7416425}, 'orientation': 'PORTRAIT',
-# 'yearlyEnergyDcKwh': 310.3404, 'segmentIndex': 1}, {'center': {'latitude': 50.6674186, 'longitude': -4.7417191},
-# 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 278.3281, 'segmentIndex': 1}]}, 'boundingBox': {'sw': {'latitude':
-# 50.6672904, 'longitude': -4.741778}, 'ne': {'latitude': 50.667431199999996, 'longitude': -4.7415536}},
-# 'imageryQuality': 'MEDIUM', 'imageryProcessedDate': {'year': 2024, 'month': 4, 'day': 18}}
+        self.insights_data = self.get_building_insights(longitude, latitude, required_quality)
+
+        # Extract key data from the insights response
+        self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
+        self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
+        self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
+        self.panel_area = (
+            self.insights_data["solarPotential"]["panelHeightMeters"] *
+            self.insights_data["solarPotential"]["panelWidthMeters"]
+        )
+        self.panel_wattage = self.insights_data["solarPotential"]["panelCapacityWatts"]
+        if self.panel_wattage != 400:
+            # In the API documentation, it claims that the default output is 250W, however we've only seen 400W, so if
+            # we get anything other than 400W, we'll need to adjust the calculations in the output. For this, we should
+            # refer to https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
+            # Where the documentation explains how to adjust the yearlyEnergyDcKwh figures.
+            # It should be straightforward, but I'd rather see an actual instance of this happening
+            raise NotImplementedError("Panel wattage is not 400W - implement me")
+
+        # Automatically exclude north-facing segments
+        self.exclude_north_facing_segments()
+
+        self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments]
+
+        # We now start finding the solar panel configurations
+        self.optimise_solar_configuration()
+
+    @staticmethod
+    def lifetime_production_ac_kwh(
+        row,
+        efficiency_depreciation_factor,
+        installation_life_span
+    ):
+        """
+        Mimics the function described in the Google Solar API documentation, presenting the lifetime production
+        AC KWH as a geometri sum
+        """
+
+        return (
+            row["initial_ac_kwh_per_year"] *
+            (1 - pow(
+                efficiency_depreciation_factor,
+                installation_life_span)) /
+            (1 - efficiency_depreciation_factor))
+
+    @staticmethod
+    def annualUtilityBillEstimate(
+        yearlyKWhEnergyConsumption,
+        initialAcKwhPerYear,
+        efficiencyDepreciationFactor,
+        year,
+        costIncreaseFactor,
+        discountRate):
+        """
+        Implements the bill costing model for esimating annual bill
+        :param yearlyKWhEnergyConsumption:
+        :param initialAcKwhPerYear:
+        :param efficiencyDepreciationFactor:
+        :param year:
+        :param costIncreaseFactor:
+        :param discountRate:
+        :return:
+        """
+
+        return (
+            billCostModel(
+                yearlyKWhEnergyConsumption -
+                annualProduction(
+                    initialAcKwhPerYear,
+                    efficiencyDepreciationFactor,
+                    year)) *
+            pow(costIncreaseFactor, year) /
+            pow(discountRate, year))
+
+    def lifetimeUtilityBill(
+        yearlyKWhEnergyConsumption,
+        initialAcKwhPerYear,
+        efficiencyDepreciationFactor,
+        installationLifeSpan,
+        costIncreaseFactor,
+        discountRate):
+        bill = [0] * installationLifeSpan
+        for year in range(installationLifeSpan):
+            bill[year] = annualUtilityBillEstimate(
+                yearlyKWhEnergyConsumption,
+                initialAcKwhPerYear,
+                efficiencyDepreciationFactor,
+                year,
+                costIncreaseFactor,
+                discountRate)
+        return bill
+
+    def estimate_solar_costs(self, panel_performance):
+        """
+        This method implements the recommended costing approach, to estimate the ROI of a solar panel
+        configuration, as described in the Google Solar API documentation
+        :param panel_performance: dataframe containing the solar panel array configuration and energy generation data
+        :return:
+        """
+
+        # we now estiamte the financial benefits of solar panels for the household, using the framework described
+        # by the Google Solar API
+        # 1) Convert Solar Energy AD production from the DC production
+        panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate
+
+        # This is just a benchmark figure, based on the national figure. This doesn't not respect the fact that a
+        # property could be 100% electric
+        average_electricity_consumption
+
+        # Remove anything where the total ac energy is less than half of the array wattage
+        panel_performance = panel_performance[
+            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5
+            ]
+
+        # 2) Calculate the liftime solar energy production
+        panel_performance['lifetime_ac_kwh'] = panel_performance.apply(
+            self.lifetime_production_ac_kwh,
+            axis=1,
+            efficiency_depreciation_factor=self.efficiency_depreciation_factor,
+            installation_life_span=self.installation_life_span
+        )
+
+        # TODO: Complete the rest of the solar model
+
+    def optimise_solar_configuration(self):
+        """
+        Optimise the solar panel configuration for the building.
+        :return:
+        """
+
+        # Remove any north facing roof segments
+        panel_performance = []
+        for config in self.insights_data["solarPotential"]["solarPanelConfigs"]:
+            roof_segment_summaries = config["roofSegmentSummaries"]
+            # Filter on just the segments in self.roof_segment_indexes
+            roof_segment_summaries = [
+                segment for segment in roof_segment_summaries if segment["segmentIndex"] in self.roof_segment_indexes
+            ]
+
+            roi_summary = []
+            for segment in roof_segment_summaries:
+                wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"]
+                generated_dc_energy = segment["yearlyEnergyDcKwh"]
+                ratio = generated_dc_energy / wattage
+                cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (generated_dc_energy / 1000)
+                roi_summary.append(
+                    {
+                        "segmentIndex": segment["segmentIndex"],
+                        "wattage": wattage,
+                        "generated_dc_energy": generated_dc_energy,
+                        "ratio": ratio,
+                        "n_panels": segment["panelsCount"],
+                        "cost": cost,
+                        "panneled_roof_area": self.panel_area * int(segment["panelsCount"])
+                    }
+                )
+
+            roi_summary = pd.DataFrame(roi_summary)
+
+            weighted_ratio = np.average(
+                roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values
+            )
+            total_cost = roi_summary["cost"].sum()
+            yearly_dc_energy = roi_summary["generated_dc_energy"].sum()
+
+            panel_performance.append(
+                {
+                    "n_panels": roi_summary["n_panels"].sum(),
+                    "yearly_dc_energy": yearly_dc_energy,
+                    "total_cost": total_cost,
+                    "weighted_ratio": weighted_ratio,
+                    "panneled_roof_area": roi_summary["panneled_roof_area"].sum(),
+                    "array_warrage": roi_summary["n_panels"].sum() * self.panel_wattage
+                }
+            )
+
+        panel_performance = pd.DataFrame(panel_performance)
+        # We can have duplicate configurations
+        panel_performance = panel_performance.drop_duplicates()
+        # Ensure more than 4 panels
+        panel_performance = panel_performance[panel_performance["n_panels"] >= 4]
+
+        self.estimate_solar_costs()
+
+        # This first bracket is the value of the energy bill savings
+        panel_performance["bill_savings"] = (
+            self.SOLAR_CONSUMPTION_PROPORTION *
+            panel_performance["total_energy"] *
+            AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        )
+        # This is the amount of energy exported
+        panel_performance["export_value"] = (
+            (1 - self.SOLAR_CONSUMPTION_PROPORTION) *
+            panel_performance["total_energy"] *
+            AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
+        )
+        panel_performance["energy_value"] = panel_performance["bill_savings"] + panel_performance["export_value"]
+        panel_performance["payback_years"] = panel_performance["total_cost"] / panel_performance["energy_value"]
+
+        panel_performance = panel_performance.sort_values("weighted_ratio", ascending=False)
+        # TODO: Finish this!!
+
+        panel_performance["roof_area_percentage"] = panel_performance["panneled_roof_area"] / self.roof_area
+
+        self.panel_performance = panel_performance
+
+    def exclude_north_facing_segments(self):
+        """
+        Filter out any north-facing roof segments from the roof_segments attribute.
+
+        North-facing segments are defined as those with an azimuth between -30 and 30 degrees.
+        """
+
+        filtered_segments = []
+        for segment_index, segment in enumerate(self.roof_segments):
+            segment["segmentIndex"] = segment_index
+            # Check if the segment is north-facing
+            if self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]:
+                continue
+
+            filtered_segments.append(segment)
+
+        self.roof_segments = filtered_segments
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -14,6 +14,7 @@ class Settings(BaseSettings):
    PLAN_TRIGGER_BUCKET: str
    EPC_AUTH_TOKEN: str
    ORDNANCE_SURVEY_API_KEY: str
+    GOOGLE_SOLAR_API_KEY: str
    DB_HOST: str
    DB_PASSWORD: str
    DB_USERNAME: str
--- a/backend/app/db/models/materials.py
+++ b/backend/app/db/models/materials.py
@ -88,3 +88,4 @@ class Material(Base):
    plant_cost = Column(Float)
    total_cost = Column(Float)
    notes = Column(String)
+    is_installer_quote = Column(Boolean, nullable=False, default=False)
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -23,12 +23,13 @@ from backend.app.db.functions.recommendations_functions import (
 )
 from backend.app.db.models.portfolio import rating_lookup
 from backend.app.dependencies import validate_token
-from backend.app.plan.schemas import PlanTriggerRequest
+from backend.app.plan.schemas import PlanTriggerRequest, MdsRequest
 from backend.app.plan.utils import get_cleaned
 from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc

 from backend.ml_models.api import ModelApi
 from backend.Property import Property
+from backend.apis.GoogleSolarApi import GoogleSolarApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply

 from recommendations.optimiser.CostOptimiser import CostOptimiser
@ -347,10 +348,15 @@ async def trigger_plan(body: PlanTriggerRequest):
            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
        )
        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
+        solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)

        logger.info("Getting spatial data")
        for p in input_properties:
+            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
            p.get_spatial_data(uprn_filenames)
+            # Call Google Solar API
+            # TODO: Complete me
+            solar_performance = solar_api_client.get(longitude=p.spatial["longitude"], latitude=p.spatial["latitude"])

        logger.info("Getting components and epc recommendations")
        recommendations = {}
@ -358,9 +364,6 @@ async def trigger_plan(body: PlanTriggerRequest):
        representative_recommendations = {}
        for p in tqdm(input_properties):

-            # Property recommendations
-            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
-
            recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
            property_recommendations, property_representative_recommendations = recommender.recommend()

@ -422,9 +425,7 @@ async def trigger_plan(body: PlanTriggerRequest):

            (
                recommendations_with_impact,
-                current_adjusted_energy,
                expected_adjusted_energy,
-                current_energy_bill,
                expected_energy_bill
            ) = (
                Recommendations.calculate_recommendation_impact(
@ -436,9 +437,7 @@ async def trigger_plan(body: PlanTriggerRequest):

            # Store the resulting adjusted energy in the property instance
            property_instance.set_adjusted_energy(
-                current_adjusted_energy=current_adjusted_energy,
                expected_adjusted_energy=expected_adjusted_energy,
-                current_energy_bill=current_energy_bill,
                expected_energy_bill=expected_energy_bill
            )

@ -622,7 +621,7 @@ async def trigger_plan(body: PlanTriggerRequest):


@router.post("/mds")
-async def build_mds(body: PlanTriggerRequest):
+async def build_mds(body: MdsRequest):
    # TODO: This is a placeholder location for the MDS endpoint, which this is being assembled

    logger.info("Connecting to db")
@ -633,6 +632,8 @@ async def build_mds(body: PlanTriggerRequest):
        session.begin()
        logger.info("Getting the inputs")
        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
+        measure_set = body.measures
+        optimise_measures = measure_set is not None

        cleaning_data = read_dataframe_from_s3_parquet(
            bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
@ -659,10 +660,14 @@ async def build_mds(body: PlanTriggerRequest):
            epc_searcher.find_property(skip_os=True)

            if config["address"] == "35b High Street":
-                print("Performing temporary patch")
+                print("Performing temporary patch on 35b High Street")
                epc_searcher.newest_epc["uprn"] = 10002911892
                epc_searcher.full_sap_epc["uprn"] = 10002911892

+            if config["address"] == "Cobnut Barn":
+                print("Performing temporary patch on Cobnut Barn")
+                epc_searcher.newest_epc["uprn"] = 10013924689
+
            # Create a record in db
            # TODO: If we productionise the creation of this mds report, we will need to store this in the db
            # property_id, is_new = create_property(
@ -706,7 +711,10 @@ async def build_mds(body: PlanTriggerRequest):
            #     (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
            # ), {})

-            measures = config["measures"] if "measures" in config else None
+            if measure_set is None:
+                measures = config["measures"] if "measures" in config else None
+            else:
+                measures = measure_set

            input_properties.append(
                Property(
@ -737,24 +745,49 @@ async def build_mds(body: PlanTriggerRequest):
        logger.info("Getting components and epc recommendations")
        recommendations_scoring_data = []
        representative_recommendations = {}
+        recommendations = {}

        for p in tqdm(input_properties):
-
            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)

-            mds = Mds(property_instance=p, materials=materials)
-            property_representative_recommendations, errors = mds.build()
+            mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
+            mds_recommendations, property_representative_recommendations, errors = mds.build()

-            if errors:
-                logger.info("Errors occurred during MDS build")
+            if isinstance(errors, list):
+                if errors:
+                    raise Exception("Errors occurred during MDS build")
+            else:
+                if any([len(x) for x in errors.values()]):
+                    raise Exception("Errors occurred during MDS build")

+            recommendations[p.id] = mds_recommendations
            representative_recommendations[p.id] = property_representative_recommendations

            # Build the scoring data
            p.create_base_difference_epc_record(cleaned_lookup=cleaned)
-            recommendations_scoring_data.append(
-                p.simulate_all_representative_recommendations(property_representative_recommendations)
-            )
+            if optimise_measures:
+                for _id, mds_recs in mds_recommendations.items():
+                    representative_ids = [r["recommendation_id"] for r in property_representative_recommendations[_id]]
+                    simulation_mds_recs = []
+                    for recs in mds_recs:
+                        simulation_mds_recs.append(
+                            [r for r in recs if r["recommendation_id"] in representative_ids]
+                        )
+
+                    p.adjust_difference_record_with_recommendations(
+                        simulation_mds_recs, property_representative_recommendations[_id]
+                    )
+
+                    data = p.recommendations_scoring_data.copy()
+                    for d in data:
+                        d["id"] = d["id"] + "*" + _id
+
+                    recommendations_scoring_data.extend(data)
+
+            else:
+                recommendations_scoring_data.append(
+                    p.simulate_all_representative_recommendations(property_representative_recommendations)
+                )

        logger.info("Preparing data for scoring in sap change api")
        recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
@ -787,13 +820,198 @@ async def build_mds(body: PlanTriggerRequest):
            for key, scored in predictions_dict.items():
                all_predictions[key] = pd.concat([all_predictions[key], scored])

-        # We now produce a table of results for the mds report
+        # TODO: 1) walls_insulation_thickness_ending is not being set in the recommendations_scoring_data,
+        #           insulation_thickness_ending is being set instead
+        #       2)

        # TODO: TEMP
        for p in plan_input:
            if p["uprn"]:
                p["uprn"] = str(int(float(p["uprn"])))

+        import re
+        from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+
+        if optimise_measures:
+            results = []
+            for p in input_properties:
+
+                sap_before = int(p.data["current-energy-efficiency"])
+                epc_before = p.data["current-energy-rating"]
+                heat_demand_before = p.data["energy-consumption-current"]
+                carbon_before = p.data["co2-emissions-current"]
+                current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+                    epc_energy_consumption=heat_demand_before * p.floor_area,
+                    current_epc_rating=epc_before,
+                )
+                current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
+
+                package_comparison = []
+                for _id in recommendations[p.id].keys():
+
+                    sap_prediction = all_predictions["sap_change_predictions"][
+                        (all_predictions["sap_change_predictions"]["property_id"] == str(p.id)) &
+                        (all_predictions["sap_change_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
+                        ].copy().reset_index(drop=True)
+                    sap_prediction["row_id"] = sap_prediction.index
+
+                    heat_demand_prediction = all_predictions["heat_demand_predictions"][
+                        (all_predictions["heat_demand_predictions"]["property_id"] == str(p.id)) &
+                        (all_predictions["heat_demand_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
+                        ].copy().reset_index(drop=True)
+                    heat_demand_prediction["row_id"] = heat_demand_prediction.index
+
+                    carbon_prediction = all_predictions["carbon_change_predictions"][
+                        (all_predictions["carbon_change_predictions"]["property_id"] == str(p.id)) &
+                        (all_predictions["carbon_change_predictions"]["recommendation_id"].str.contains(re.escape(_id)))
+                        ].copy().reset_index(drop=True)
+                    carbon_prediction["row_id"] = carbon_prediction.index
+
+                    epc_target = body.goal_value
+                    if epc_before == epc_target:
+                        continue
+
+                    sap_target = epc_to_sap_lower_bound(epc_target)
+                    # Define the measures
+                    sap_threshold_barrier = sap_prediction[sap_prediction["predictions"] >= sap_target]
+                    meets_threshold = True
+                    if sap_threshold_barrier.empty:
+                        sap_threshold_barrier = sap_prediction.tail(1)
+                        meets_threshold = False
+                    sap_threshold_barrier = sap_threshold_barrier.head(1)
+
+                    sap_prediction = sap_prediction[
+                        sap_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
+                        ]
+                    heat_demand_prediction = heat_demand_prediction[
+                        heat_demand_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
+                        ]
+                    carbon_prediction = carbon_prediction[
+                        carbon_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0]
+                        ]
+
+                    reverse_map = {v: k for k, v in Mds.format_map.items()}
+
+                    selected_measures = [
+                        reverse_map[x.split("-")[0]] for x in sap_prediction["recommendation_id"].values
+                    ]
+                    selected_measure_ids = [x.split("*")[0] for x in sap_prediction["recommendation_id"].values]
+
+                    costs = [
+                        r["total"] for r in representative_recommendations[p.id][_id] if
+                        r["recommendation_id"] in selected_measure_ids
+                    ]
+                    costs = sum(costs)
+
+                    sap_after = sap_prediction["predictions"].values[-1]
+                    epc_after = sap_to_epc(sap_after)
+                    heat_demand_after = heat_demand_prediction["predictions"].values[-1]
+                    carbon_after = carbon_prediction["predictions"].values[-1]
+
+                    expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+                        epc_energy_consumption=heat_demand_after * p.floor_area,
+                        current_epc_rating=epc_before,
+                    )
+
+                    expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
+
+                    bill_savings = current_energy_bill - expected_energy_bill
+                    energy_savings = current_adjusted_energy - expected_adjusted_energy
+
+                    package_comparison.append(
+                        {
+                            "id": _id,
+                            "cost": costs,
+                            "measures": selected_measures,
+                            "sap_before": sap_before,
+                            "sap_after": sap_after,
+                            "epc_before": epc_before,
+                            "epc_after": epc_after,
+                            "heat_demand_before": heat_demand_before,
+                            "heat_demand_after": heat_demand_after,
+                            "carbon_before": carbon_before,
+                            "carbon_after": carbon_after,
+                            "bill_savings": bill_savings,
+                            "energy_savings": energy_savings,
+                            "current_energy_bill": current_energy_bill,
+                            "meets_threshold": meets_threshold
+                        }
+                    )
+
+                package_comparison = pd.DataFrame(package_comparison)
+                # Find the smallest cost package
+                if not package_comparison.empty:
+
+                    # We check if any of the packages meet the threshold
+                    # If none of them do, take the one that gets closest to the target
+                    if package_comparison["meets_threshold"].any():
+                        package_comparison = package_comparison[package_comparison["meets_threshold"]]
+                        package_comparison = package_comparison.sort_values("cost")
+                    else:
+                        package_comparison = package_comparison.sort_values("sap_after", ascending=False)
+
+                    package_comparison = package_comparison.head(1).to_dict("records")[0]
+                else:
+                    package_comparison = {
+                        "measures": [],
+                        "sap_before": sap_before,
+                        "sap_after": sap_before,
+                        "epc_before": epc_before,
+                        "epc_after": epc_before,
+                        "heat_demand_before": heat_demand_before,
+                        "heat_demand_after": heat_demand_before,
+                        "carbon_before": carbon_before,
+                        "carbon_after": carbon_before,
+                        "bill_savings": 0,
+                        "energy_savings": 0,
+                        "current_energy_bill": current_energy_bill,
+                        "meets_threshold": False
+                    }
+
+                config = [c for c in plan_input if c["uprn"] == str(p.uprn)]
+                if not config:
+                    config = {"address": None, "postcode": None}
+                else:
+                    config = config[0]
+
+                results.append({
+                    "config_address": config["address"],
+                    "config_postcode": config["postcode"],
+                    "uprn": p.uprn,
+                    "address": p.address,
+                    "postcode": p.postcode,
+                    "measures": package_comparison["measures"],
+                    "year_of_epc": p.data['lodgement-date'],
+                    "sap_before": package_comparison["sap_before"],
+                    "sap_after": package_comparison["sap_after"],
+                    "epc_before": package_comparison["epc_before"],
+                    "epc_after": package_comparison["epc_after"],
+                    "heat_demand_before": package_comparison["heat_demand_before"],
+                    "heat_demand_after": package_comparison["heat_demand_after"],
+                    "carbon_before": package_comparison["carbon_before"],
+                    "carbon_after": package_comparison["carbon_after"],
+                    "bill_savings": round(package_comparison["bill_savings"], 2),
+                    "energy_savings": round(package_comparison["energy_savings"], 2),
+                    "current_energy_bill": round(package_comparison["current_energy_bill"], 2),
+                    "EWI": "EWI" if "external_wall_insulation" in package_comparison["measures"] else None,
+                    "CWI": "CWI" if "cavity_wall_insulation" in package_comparison["measures"] else None,
+                    "LI": "LI" if "loft_insulation" in package_comparison["measures"] else None,
+                    "ASHP Htg": "ASHP Htg" if "air_source_heat_pump" in package_comparison["measures"] else None,
+                    "Elec Storage": (
+                        "Elec Storage Htrs (Out of scope -Prov sum only)" if "high_heat_retention_storage_heaters" in
+                                                                             package_comparison["measures"] else None
+                    ),
+                    "Solar PV": "Solar PV" if "solar_pv" in package_comparison["measures"] else None,
+                })
+
+            results = pd.DataFrame(results)
+
+            # For the different measures, we check the impact with a few debugging functions
+
+            walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures)
+
+            results.to_excel("optimised mds_results 5th June.xlsx")
+
        results = []
        for p in input_properties:
            measures = p.measures
@ -842,11 +1060,14 @@ async def build_mds(body: PlanTriggerRequest):
            )

            # TODO: We should determine if the home is gas & electricity or just electricity
+
+            # Determine if the heating and hotwater was previously electric only or both
+
            current_energy_bill = AnnualBillSavings.calculate_annual_bill(
-                current_adjusted_energy,
+                kwh=current_adjusted_energy,
            )
            expected_energy_bill = AnnualBillSavings.calculate_annual_bill(
-                expected_adjusted_energy,
+                kwh=expected_adjusted_energy,
            )

            bill_savings = current_energy_bill - expected_energy_bill
@ -861,6 +1082,7 @@ async def build_mds(body: PlanTriggerRequest):
            to_append = {
                "config_address": config["address"],
                "config_postcode": config["postcode"],
+                "uprn": p.uprn,
                "address": p.address,
                "postcode": p.postcode,
                "measures": measures,
@ -874,14 +1096,19 @@ async def build_mds(body: PlanTriggerRequest):
                "heat_demand_after": heat_demand_after,
                "carbon_before": carbon_before,
                "carbon_after": carbon_after,
-                "bill_savings": bill_savings,
-                "energy_savings": energy_savings,
+                "bill_savings": round(bill_savings, 2),
+                "energy_savings": round(energy_savings, 2),
+                "current_energy_bill": round(current_energy_bill, 2),
+                "fuel_type": p.main_fuel["fuel_type"],
            }
            results.append(to_append)

        results = pd.DataFrame(results)
        results["sap_uplift"] = results["sap_after"] - results["sap_before"]

+        # results.to_excel("mds_results 5th June.xlsx")
+
+        walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures)

    except IntegrityError:
        logger.error("Database integrity error occurred", exc_info=True)
@ -901,3 +1128,80 @@ async def build_mds(body: PlanTriggerRequest):
        return Response(status_code=500, content="An unexpected error occurred.")
    finally:
        session.close()
+
+
+def check_mds(results, input_properties, recommendations, optimise_measures):
+    import ast
+    walls_check = []
+    hhr_check = []
+    for p in input_properties:
+        res = results[results["uprn"] == p.uprn]
+        wall = p.walls
+        heating = p.main_heating
+        heating_controls = p.main_heating_controls
+
+        if optimise_measures:
+            measures = res["measures"].values[0]
+        else:
+            measures = [list(z.keys())[0] for z in res["measures"].values[0]]
+
+        wall_recommendation = [
+            x for x in measures if
+            x in ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"]
+        ]
+
+        hhr_recommendation = [
+            x for x in measures if
+            x in ["high_heat_retention_storage_heaters"]
+        ]
+
+        if optimise_measures:
+            possible_measures = [ast.literal_eval(x) for x in list(recommendations[p.id].keys())]
+            # Unlist them
+            possible_measures = [x for sublist in possible_measures for x in sublist]
+            possible_measures = list(set(possible_measures))
+        else:
+            possible_measures = p.measures
+
+        if wall_recommendation:
+            if len(wall_recommendation) > 1:
+                raise Exception("something went wrong")
+            wall_recommendation = wall_recommendation[0]
+        else:
+            wall_recommendation = None
+
+        hhr_recommendation = hhr_recommendation[0] if hhr_recommendation else None
+
+        walls_check.append(
+            {
+                "uprn": p.uprn,
+                "address": p.address,
+                "postcode": p.postcode,
+                "property_type": p.data['property-type'],
+                "conservation_status": p.spatial["conservation_status"],
+                "is_listed_building": p.spatial["is_listed_building"],
+                "is_heritage_building": p.spatial["is_heritage_building"],
+                "wall": wall["clean_description"],
+                "recommendation": wall_recommendation,
+                "possible_measures": possible_measures,
+                "selected_measures": res["measures"].values[0],
+            }
+        )
+
+        hhr_check.append(
+            {
+                "uprn": p.uprn,
+                "address": p.address,
+                "postcode": p.postcode,
+                "heating": heating["clean_description"],
+                "heating_controls": heating_controls["clean_description"],
+                "recommendation": hhr_recommendation,
+                "possible_measures": possible_measures,
+                "selected_measures": res["measures"].values[0],
+            }
+        )
+
+    walls_check = pd.DataFrame(walls_check)
+    hhr_check = pd.DataFrame(hhr_check)
+
+    return walls_check, hhr_check
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@ -52,3 +52,9 @@ class PlanTriggerRequest(BaseModel):
        if v not in cls._allowed_housing_types:
            raise ValueError(f"{v} is not a valid housing type")
        return v
+
+
+class MdsRequest(PlanTriggerRequest):
+    # When creating the mds report, we allow an optional list of measures to select from. If this is passed, it will
+    # cause the service to select the optimal package from the list of measures
+    measures: Optional[conlist(str, min_items=1)] = None
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@ -1,3 +1,17 @@
+import numpy as np
+
+QUARTERLY_ENERGY_PRICES = [
+    # 2024 Q1
+    {"start": "2024-01-01", "end": "2024-03-31", "electricity": 0.2, "gas": 0.042},
+    # 2023 Q4
+    {"start": "2023-10-01", "end": "2023-12-31", "electricity": 0.202, "gas": 0.51},
+    # 2023 Q3
+    {"start": "2023-07-01", "end": "2023-09-30", "electricity": 0.188, "gas": 0.46},
+    # 2023 Q2
+    {"start": "2023-04-01", "end": "2023-06-30", "electricity": 0.177, "gas": 0.456},
+]
+
+
 class AnnualBillSavings:
    """
    This is a simple class which will estimate the annual bill savings, based on the kwh savings.
@ -14,6 +28,8 @@ class AnnualBillSavings:
    # https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
    ELECTRICITY_PRICE_CAP = 0.245
    GAS_PRICE_CAP = 0.0604
+    # This is the most recent export payment figure, at 12p per kwh
+    ELECTRICITY_EXPORT_PAYMENT = 0.12

    # This is a weighted mean of the price caps, using the consumption figures above as weights
    PRICE_FACTOR = 0.09549999999999999
@ -58,8 +74,58 @@ class AnnualBillSavings:

        return cls.ELECTRICITY_PRICE_CAP * kwh + (cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)

+    @staticmethod
+    def calculate_occupants(total_floor_area):
+        """
+        From Table 1b of the SAP 2012 documentation https://bregroup.com/documents/d/bre-group/sap-2012_9-92
+        Provides a methodology to estimate occupancy, based on floor area. This is used to calculate the amount of
+        electricity used be appliances and during cooking.
+        :param total_floor_area:
+        :return:
+        """
+
+        if total_floor_area <= 13.9:
+            return 1
+
+        return 1 + (1.76 * (1 - np.exp(-0.000349 * (total_floor_area - 13.9) * (total_floor_area - 13.9))) + 0.0013 * (
+            total_floor_area - 13.9))
+
+    @staticmethod
+    def estimate_electrical_appliances(occupants, total_floor_area):
+        """
+        From secion L2 of SAP2012 Electrical appliances
+        https://bregroup.com/documents/d/bre-group/sap-2012_9-92
+        Used to estimate the amount of energy used by electrical appliances
+        :param occupants:
+        :param total_floor_area:
+        :return:
+        """
+        e_a = 207.8 * np.power(total_floor_area * occupants, 0.4717)
+
+        days_in_month = {
+            1: 31,
+            2: 28,
+            3: 31,
+            4: 30,
+            5: 31,
+            6: 30,
+            7: 31,
+            8: 31,
+            9: 30,
+            10: 31,
+            11: 30,
+            12: 31
+        }
+
+        eam = 0
+        for m in range(1, 13):
+            nm = days_in_month[m]
+            eam += e_a * (1 + 0.157 * np.cos(2 * np.pi * (m - 1.78) / 12)) * nm / 365
+
+        return eam
+
    @classmethod
-    def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
+    def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating, total_floor_area):
        """
        The over-prediction of energy use by EPCs in Great Britain: A comparison
        of EPC-modelled and metered primary energy use intensity
@ -70,6 +136,13 @@ class AnnualBillSavings:
        :return:
        """

+        # The EPC energy consumption does not factor in cooking and applicance use, so this is estimated using the
+        # methodology outlined in SAP, and is discussed in the UCL paper in section 3.1.1
+        estimated_occupants = cls.calculate_occupants(total_floor_area=total_floor_area)
+        appliances_energy_use = cls.estimate_electrical_appliances(estimated_occupants, total_floor_area)
+
+        epc_energy_consumption += appliances_energy_use
+
        gradients = {
            "A": -0.1,
            "B": -0.1,
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@ -90,6 +90,9 @@ class PropertyValuation:
        41222760: 46_000,  # Based on Zoopla
        41222761: 270_000,  # Based on Zoopla
        41212534: 38_000,  # Based on Zoopla
+        # Northern Group Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        10070868263: 194_000,  # Based on Zoopla
+        10070868244: 195_000,  # Based on Zoopla
    }

    # We base our valuation uplifts on a number of sources
--- a/backend/tests/test_annual_bill_savings.py
+++ b/backend/tests/test_annual_bill_savings.py
@ -0,0 +1,82 @@
+import numpy as np
+import pytest
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+
+appliance_consumption_cases = [
+    {
+        "total_floor_area": 13.9,
+        "n_occupants": 1,
+        "consumption": 718.4795859263703
+    },
+    {
+        "total_floor_area": 20,
+        "n_occupants": 1.0306381042556767,
+        "consumption": 865.2316409517844
+    },
+    {
+        "total_floor_area": 30,
+        "n_occupants": 1.1731577598127325,
+        "consumption": 1113.5965321501362
+    },
+    {
+        "total_floor_area": 50,
+        "n_occupants": 1.6901008890848956,
+        "consumption": 1683.31305074609
+    },
+    {
+        "total_floor_area": 75,
+        "n_occupants": 2.361158387531988,
+        "consumption": 2386.2935599981865
+    },
+    {
+        "total_floor_area": 100,
+        "n_occupants": 2.739525875076067,
+        "consumption": 2931.6076153011486
+    },
+    {
+        "total_floor_area": 125,
+        "n_occupants": 2.8807344137165405,
+        "consumption": 3335.143110751552
+    },
+    {
+        "total_floor_area": 150,
+        "n_occupants": 2.934188599837662,
+        "consumption": 3666.3228057866513
+    },
+    {
+        "total_floor_area": 200,
+        "n_occupants": 3.001920087128373,
+        "consumption": 4244.625403339813
+    },
+    {
+        "total_floor_area": 300,
+        "n_occupants": 3.1319299999993095,
+        "consumption": 5243.086106676302
+    },
+    {
+        "total_floor_area": 500,
+        "n_occupants": 3.39193,
+        "consumption": 6927.400500420533
+    },
+    {
+        "total_floor_area": 1000,
+        "n_occupants": 4.04193,
+        "consumption": 10434.755635642652
+    }
+]
+
+
+class TestAnnualBillSavings:
+
+    @pytest.mark.parametrize(
+        "test_case",
+        appliance_consumption_cases
+    )
+    def test_appliance_estimation(self, test_case):
+        n_occupants = AnnualBillSavings.calculate_occupants(test_case["total_floor_area"])
+        assert np.isclose(n_occupants, test_case["n_occupants"])
+
+        appliance_consumption = AnnualBillSavings.estimate_electrical_appliances(
+            n_occupants, test_case["total_floor_area"]
+        )
+        assert np.isclose(appliance_consumption, test_case["consumption"])
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@ -0,0 +1,543 @@
+import pandas as pd
+import numpy as np
+import msgpack
+from xgboost import XGBRegressor
+from datetime import datetime
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
+from sklearn.feature_selection import RFECV
+from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_from_s3
+import logging
+from pprint import pprint
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+
+class EnergyConsumptionModel:
+    FEATURES = {
+        # "heating_kwh": [
+        #     "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
+        #     "heating-cost-current",
+        #     "total-floor-area", "number-heated-rooms",
+        #     "mainheat-description", "mainheat-energy-eff", "main-fuel", "secondheat-description",
+        #     "property-type", "built-form", "mainheatcont-description", 'hotwater-description', 'hot-water-energy-eff',
+        #     # TESTING
+        #     # "walls-description",
+        #     "walls-energy-eff",
+        #     # "roof-description",
+        #     "roof-energy-eff",
+        #     # "floor-description",
+        #     # "county"
+        #     # "co2-emissions-current", - Made it worse
+        #     # TODO: Should hot water features go in here?
+        #     # , , "number-habitable-rooms",
+        #     #
+        #     #
+        #     #
+        # ],
+        "heating_kwh": [
+            "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
+            "heating-cost-current", "heating-cost-potential", "total-floor-area", "number-heated-rooms",
+            "mainheat-description", "mainheat-energy-eff", "main-fuel", "secondheat-description", "property-type",
+            "built-form", "mainheatcont-description", "hotwater-description", "hot-water-energy-eff",
+            "walls-energy-eff",
+            "roof-energy-eff", "windows-description", "windows-energy-eff", "floor-description", "flat-top-storey",
+            "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
+            "low-energy-lighting", "environment-impact-current", "energy-tariff",
+            "county", "construction-age-band", "co2-emissions-current"
+        ],
+        "hot_water_kwh": [
+            "lodgement-year", "lodgement-month",
+            "current-energy-efficiency",
+            "energy-consumption-current",
+            "hot-water-cost-current",
+            "total-floor-area", "number-heated-rooms",
+            "hotwater-description", "hot-water-energy-eff", "main-fuel", "property-type", "built-form",
+            "co2-emissions-current",
+        ]
+    }
+    TARGETS = ['heating_kwh', 'hot_water_kwh']
+    CATEGORICAL_COLUMNS = [
+        "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
+        "number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type", "built-form",
+        "construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
+        "walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
+        "county",
+        "windows-description", "windows-energy-eff", "flat-top-storey",
+        "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
+        "low-energy-lighting", "environment-impact-current", "energy-tariff"
+    ]
+
+    def __init__(self, cleaned, model_paths=None, n_jobs=1):
+        self.cleaned = cleaned
+        self.models = {}
+        self.model_paths = model_paths or {}
+        self.n_jobs = n_jobs
+
+        self.data = None
+        self.input_data = None
+        self.dummy_columns = None
+        self.training_predictions = {}
+        self.testing_predictions = {}
+        self.best_iteration = {}
+
+        self.x_train = {}
+        self.x_test = {}
+        self.x_val = {}
+        self.y_val = {}
+        self.y_train = {}
+        self.y_test = {}
+        self.selected_features = {}
+
+        self.NUMERICAL_COLUMNS = list({
+            x for x in self.FEATURES["heating_kwh"] + self.FEATURES["hot_water_kwh"]
+            if x not in self.CATEGORICAL_COLUMNS
+        })
+
+        if model_paths:
+            for target, path in model_paths.items():
+                self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
+
+    def read_dataset(self, file_path):
+        """Reads the dataset from the specified file path."""
+        logging.info(f"Reading dataset from {file_path}")
+        self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path)
+        self.input_data = self.data.copy()
+
+    def feature_engineering(self):
+        """Performs feature engineering on the dataset."""
+        logging.info("Starting feature engineering")
+        self.data["lodgement-date"] = pd.to_datetime(self.data["lodgement-date"])
+        self.data["lodgement-year"] = self.data["lodgement-date"].dt.year
+        self.data["lodgement-month"] = self.data["lodgement-date"].dt.month
+
+        # For walls, roof, floor description where we have average thermal transmittance, to avoid too many categories
+        # we group them
+        ranges = {
+            "lessthan 0.1": (0, 0.1),
+            "0.1 - 0.3": (0.1, 0.3),
+            "0.3 - 0.5": (0.3, 0.5),
+            "morethan 0.5": (0.5, 2.5),
+        }
+
+        # Generate the lookup table
+        thermal_transmittance_lookup_table = []
+        for i in range(1, 251):
+            value = i / 100
+            for label, (low, high) in ranges.items():
+                if low < value <= high:
+                    thermal_transmittance_lookup_table.append({"from": value, "to": label})
+                    break
+
+        # Convert to DataFrame for display
+        thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
+        thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
+
+        # Apply the lookup table to the data
+        for feature in ["walls-description", "roof-description", "floor-description"]:
+            cleaned_df = pd.DataFrame(self.cleaned[feature])[["original_description", "thermal_transmittance"]]
+            # Round to 2 decimal places and convert to string
+            cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
+
+            self.data = self.data.merge(
+                cleaned_df,
+                how="left",
+                left_on=feature,
+                right_on="original_description",
+            )
+            # We now have the thermal transmittance in the data, which we can use to group with the lookup table
+            self.data = self.data.merge(
+                thermal_transmittance_lookup_table,
+                how="left",
+                left_on="thermal_transmittance",
+                right_on="from",
+            )
+            # Where "to" is populated, replace feature with to
+            self.data[feature] = np.where(
+                ~pd.isnull(self.data["to"]),
+                self.data["to"],
+                self.data[feature]
+            )
+            self.data = self.data.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
+
+        # Modify number of heated rooms and number of habitable rooms
+        # self.data["number-heated-rooms"] = self.data["number-heated-rooms"].apply(lambda x: "10+" if x > 10 else
+        # str(x))
+        # self.data["number-habitable-rooms"] = self.data["number-habitable-rooms"].apply(
+        #     lambda x: "10+" if x > 10 else str(x)
+        # )
+
+        # Convert data types
+        self.data[self.NUMERICAL_COLUMNS] = self.data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric)
+        self.data[self.CATEGORICAL_COLUMNS] = self.data[self.CATEGORICAL_COLUMNS].astype(str)
+
+        # Convert categorical columns to dummies
+        self.data = pd.get_dummies(self.data, columns=self.CATEGORICAL_COLUMNS, drop_first=True)
+
+        # Store the dummy columns
+        self.dummy_columns = {}
+        for target in self.TARGETS:
+            target_features = self.FEATURES[target]
+            dummy_feature_columns = []
+            for feature in target_features:
+                if feature in self.CATEGORICAL_COLUMNS:
+                    dummy_feature_columns.extend([col for col in self.data.columns if col.startswith(feature + '_')])
+                else:
+                    dummy_feature_columns.append(feature)
+            self.dummy_columns[target] = dummy_feature_columns
+
+        logging.info("Feature engineering completed")
+
+    def split_dataset(self, target, test_size=0.2, validation_size=0.2, random_state=42):
+        """Splits the dataset into training, validation, and testing sets."""
+        if target not in self.TARGETS:
+            raise ValueError(f"Target {target} not in {self.TARGETS}")
+
+        logging.info(f"Splitting dataset for target {target}")
+
+        # Split into train + validation and test sets
+        x_train_val, x_test, y_train_val, y_test = train_test_split(
+            self.data[self.dummy_columns[target]],
+            self.data[target],
+            test_size=test_size,
+            random_state=random_state
+        )
+
+        # Split train + validation into train and validation sets
+        x_train, x_val, y_train, y_val = train_test_split(
+            x_train_val,
+            y_train_val,
+            test_size=validation_size / (1 - test_size),
+            random_state=random_state
+        )
+
+        self.x_train[target], self.x_val[target], self.x_test[target] = x_train, x_val, x_test
+        self.y_train[target], self.y_val[target], self.y_test[target] = y_train, y_val, y_test
+
+    def feature_selection(self, target, cv_folds=3, sample_fraction=0.1, random_state=42):
+        """
+        Performs feature selection using RFECV with XGBoost.
+
+        Parameters:
+        - target: The target variable for feature selection.
+        - cv_folds: Number of cross-validation folds.
+        - sample_fraction: Fraction of the data to use for feature selection.
+        - random_state: Random state for reproducibility.
+        """
+        if target not in self.TARGETS:
+            raise ValueError(f"Target {target} not in {self.TARGETS}")
+
+        logging.info(f"Starting feature selection for target {target}")
+
+        # Sample the data if specified
+        if sample_fraction < 1.0:
+            x_sample, _, y_sample, _ = train_test_split(
+                self.x_train[target], self.y_train[target],
+                train_size=sample_fraction, random_state=random_state
+            )
+        else:
+            x_sample = self.x_train[target]
+            y_sample = self.y_train[target]
+
+        # Initialize the XGBoost model and RFECV
+        model = self.init_model(feature_selection=True)
+        selector = RFECV(
+            model, step=1, cv=cv_folds, scoring='neg_mean_absolute_percentage_error', verbose=1, n_jobs=self.n_jobs
+        )
+        selector = selector.fit(x_sample, y_sample)
+
+        # Get the selected features
+        self.selected_features[target] = x_sample.columns[selector.support_]
+
+        # Update x_train, x_test and x_val with selected features
+        self.x_train[target] = self.x_train[target][self.selected_features[target]]
+        self.x_test[target] = self.x_test[target][self.selected_features[target]]
+        self.x_val[target] = self.x_val[target][self.selected_features[target]]
+
+        logging.info(f"Feature selection completed for target {target}")
+
+    def init_model(self, feature_selection=False):
+
+        if feature_selection:
+            # Set up a smaller model to work it
+            return XGBRegressor(
+                objective='reg:squarederror',
+                n_estimators=50,
+                learning_rate=0.05,
+                max_depth=6,
+                subsample=0.8,
+                colsample_bytree=0.8,
+                reg_alpha=0.1,
+                reg_lambda=0.1
+            )
+
+        return XGBRegressor(
+            objective='reg:squarederror',
+            n_estimators=1000,
+            learning_rate=0.05,
+            max_depth=6,
+            min_child_weight=3,
+            subsample=0.8,
+            colsample_bytree=0.8,
+            reg_alpha=0.1,
+            reg_lambda=0.1
+            # n_jobs=self.n_jobs
+        )
+
+    def fit_model(self, target):
+        """Fits the model to the training data and removes zero-importance features."""
+
+        logging.info(f"Fitting model for target {target}")
+
+        # Initialize and fit the model
+        model = self.init_model()
+        model.fit(
+            self.x_train[target],
+            self.y_train[target],
+            eval_set=[(self.x_val[target], self.y_val[target])],
+            early_stopping_rounds=50
+        )
+
+        # Store the model
+        self.models[target] = model
+
+        # Identify and remove zero-importance features
+        feature_importance = pd.DataFrame({
+            'Feature': self.x_train[target].columns,
+            'Importance': model.feature_importances_
+        })
+        zero_importance_features = feature_importance[feature_importance['Importance'] == 0]['Feature'].tolist()
+
+        if zero_importance_features:
+            logging.info(f"Removing zero-importance features for target {target}: {zero_importance_features}")
+
+            self.x_train[target] = self.x_train[target].drop(columns=zero_importance_features)
+            self.x_val[target] = self.x_val[target].drop(columns=zero_importance_features)
+            self.x_test[target] = self.x_test[target].drop(columns=zero_importance_features)
+
+            # Re-fit the model with the reduced feature set
+            model = self.init_model()
+            model.fit(
+                self.x_train[target],
+                self.y_train[target],
+                eval_set=[(self.x_val[target], self.y_val[target])],
+                early_stopping_rounds=50
+            )
+
+            # Update the model
+            self.models[target] = model
+
+        # Store the best iteration
+        self.best_iteration[target] = self.models[target].best_iteration
+
+        logging.info(f"Model fitting completed for target {target}")
+
+    def re_train_final_model(self, target):
+        """Re-trains the final model on the combined training and validation set."""
+        logging.info(f"Re-training final model for target {target}")
+        x_train_val = pd.concat([self.x_train[target], self.x_val[target]])
+        y_train_val = pd.concat([self.y_train[target], self.y_val[target]])
+
+        self.models[target] = self.init_model()
+
+        self.models[target].fit(x_train_val, y_train_val, verbose=False)
+        logging.info(f"Re-training final model completed for target {target}")
+
+    def evaluate_model(self, target):
+        """Evaluates the model on training and testing data."""
+        logging.info(f"Evaluating model for target {target}")
+        y_train_pred = self.models[target].predict(self.x_train[target])
+        train_mse = mean_squared_error(self.y_train[target], y_train_pred)
+        train_r2 = r2_score(self.y_train[target], y_train_pred)
+        train_mape = mean_absolute_percentage_error(self.y_train[target], y_train_pred)
+
+        self.training_predictions[target] = pd.DataFrame({
+            'Actual': self.y_train[target],
+            'Predicted': y_train_pred
+        })
+        self.training_predictions[target]["residual"] = abs(
+            self.training_predictions[target]["Actual"] - self.training_predictions[target]["Predicted"]
+        )
+
+        y_test_pred = self.models[target].predict(self.x_test[target])
+        test_mse = mean_squared_error(self.y_test[target], y_test_pred)
+        test_r2 = r2_score(self.y_test[target], y_test_pred)
+        test_mape = mean_absolute_percentage_error(self.y_test[target], y_test_pred)
+
+        self.testing_predictions[target] = pd.DataFrame({
+            'Actual': self.y_test[target],
+            'Predicted': y_test_pred
+        })
+        self.testing_predictions[target]["residual"] = abs(
+            self.testing_predictions[target]["Actual"] - self.testing_predictions[target]["Predicted"]
+        )
+
+        if target in self.selected_features:
+            feature_importance = pd.DataFrame({
+                'Feature': self.selected_features[target],
+                'Importance': self.models[target].feature_importances_
+            }).sort_values(by='Importance', ascending=False)
+        else:
+            feature_importance = pd.DataFrame({
+                'Feature': self.x_train[target].columns,
+                'Importance': self.models[target].feature_importances_
+            }).sort_values(by='Importance', ascending=False)
+
+        logging.info(f"Evaluation completed for target {target}")
+
+        return {
+            'train': {
+                'MSE': train_mse,
+                'R2': train_r2,
+                'MAPE': train_mape,
+                'Feature Importance': feature_importance
+            },
+            'test': {
+                'MSE': test_mse,
+                'R2': test_r2,
+                'MAPE': test_mape
+            }
+        }
+
+    def save_model(self, target):
+        """Saves the model to S3."""
+        logging.info(f"Saving model for target {target}")
+        run_date = datetime.now().strftime("%Y-%m-%d")
+        save_pickle_to_s3(
+            self.models[target],
+            bucket_name="retrofit-model-directory-dev",
+            s3_file_name=f"model_directory/energy_consumption_model/{target}_{run_date}.pkl"
+        )
+
+    def score_new_data(self, new_data, target):
+        """Scores new data using the trained model."""
+        if target not in self.models:
+            raise ValueError(f"Model for target {target} not loaded or trained")
+
+        new_data_transformed = self.transform_new_data(new_data, target)
+        return self.models[target].predict(new_data_transformed)
+
+    def transform_new_data(self, new_data, target):
+        """Applies the same transformations to new data as were applied to the training data."""
+
+        # TODO THis should jsut use our other transformation function
+        new_data["lodgement-date"] = pd.to_datetime(new_data["lodgement-date"])
+        new_data["lodgement-year"] = new_data["lodgement-date"].dt.year
+        new_data["lodgement-month"] = new_data["lodgement-date"].dt.month
+
+        # Convert categorical columns to dummies
+        new_data = pd.get_dummies(new_data, columns=self.CATEGORICAL_COLUMNS, drop_first=True)
+
+        # Align new data with the dummy columns from training data
+        new_data = new_data.reindex(columns=self.dummy_columns[target], fill_value=0)
+
+        # Select the features used by the model
+        new_data = new_data[self.selected_features[target]]
+
+        return new_data
+
+    def error_analysis(self, target, top_n=10, unique_threshold=0.8):
+        """
+        Perform error analysis on the provided model and dataset.
+
+        Parameters:
+        - target: The target variable to analyze.
+        - top_n: Number of top residuals to consider for analysis.
+        - unique_threshold: Threshold to exclude columns with high unique values.
+
+        Returns:
+        - summary: Dictionary summarizing common features among poorly performing rows.
+        """
+
+        # Calculate predictions and residuals
+        y_train_pred = self.models[target].predict(self.x_train[target])
+        y_test_pred = self.models[target].predict(self.x_test[target])
+
+        train_residuals = self.y_train[target] - y_train_pred
+        test_residuals = self.y_test[target] - y_test_pred
+
+        # Identify top N poorly performing rows by absolute residuals
+        top_train_indices = train_residuals.abs().nlargest(top_n).index
+        top_test_indices = test_residuals.abs().nlargest(top_n).index
+
+        top_train_data = self.input_data.loc[top_train_indices]
+        top_test_data = self.input_data.loc[top_test_indices]
+
+        # Automatically detect and exclude columns
+        def exclude_columns(data, threshold):
+            exclude_cols = []
+            num_rows = data.shape[0]
+            for col in data.columns:
+                if data[col].dtype == 'object' and data[col].nunique() / num_rows >= threshold:
+                    exclude_cols.append(col)
+            return exclude_cols
+
+        exclude_cols = exclude_columns(top_train_data, unique_threshold)
+
+        top_train_data = top_train_data.drop(columns=exclude_cols)
+        top_test_data = top_test_data.drop(columns=exclude_cols)
+
+        # One-hot encode categorical variables
+        categorical_columns = top_train_data.select_dtypes(include=['object']).columns.tolist()
+        top_train_data_encoded = pd.get_dummies(top_train_data, columns=categorical_columns, drop_first=True)
+        top_test_data_encoded = pd.get_dummies(top_test_data, columns=categorical_columns, drop_first=True)
+
+        # Ensure all original columns are included in the encoded data
+        top_train_data_encoded = top_train_data_encoded.reindex(columns=self.input_data.columns, fill_value=0)
+        top_test_data_encoded = top_test_data_encoded.reindex(columns=self.input_data.columns, fill_value=0)
+
+        # Correlation analysis with residuals
+        train_corr = top_train_data_encoded.corrwith(train_residuals.loc[top_train_indices])
+        test_corr = top_test_data_encoded.corrwith(test_residuals.loc[top_test_indices])
+
+        # Return summaries
+        summary = {
+            "train_summary": top_train_data.describe(include='all').T,
+            "test_summary": top_test_data.describe(include='all').T,
+            "train_corr": train_corr,
+            "test_corr": test_corr,
+            "top_train_data": top_train_data,
+            "top_test_data": top_test_data
+        }
+
+        return summary
+
+
+# Usage:
+cleaned = read_from_s3(
+    s3_file_name="cleaned_epc_data/cleaned.bson",
+    bucket_name="retrofit-data-dev"
+)
+
+cleaned = msgpack.unpackb(cleaned, raw=False)
+
+model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
+model.read_dataset('energy_consumption/2024-07-05/energy_consumption_dataset.parquet')
+model.feature_engineering()
+
+# For heating_kwh
+model.split_dataset(target='heating_kwh')
+model.fit_model(target='heating_kwh')
+model.re_train_final_model(target='heating_kwh')
+evaluation_results = model.evaluate_model(target='heating_kwh')
+
+pprint(evaluation_results["train"])
+pprint(evaluation_results["test"])
+
+importance_df = evaluation_results["train"]["Feature Importance"]
+testing_predictions = model.testing_predictions["heating_kwh"]
+testing_predictions = testing_predictions.sort_values("residual", ascending=False)
+training_predictions = model.training_predictions["heating_kwh"]
+training_predictions = training_predictions.sort_values("residual", ascending=False)
+# Merge on model.input_data, by the index
+merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
+merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
+
+# For hot_water_kwh
+model.split_dataset(target='hot_water_kwh')
+model.fit_model(target='hot_water_kwh')
+model.re_train_final_model(target='hot_water_kwh')
+evaluation_results = model.evaluate_model(target='hot_water_kwh')
+pprint(evaluation_results["train"])
+pprint(evaluation_results["test"])
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@ -0,0 +1,184 @@
+import time
+from datetime import datetime, timedelta
+from dateutil.relativedelta import relativedelta
+
+import requests
+import inspect
+import pandas as pd
+from tqdm import tqdm
+from bs4 import BeautifulSoup
+from etl.epc.settings import EARLIEST_EPC_DATE
+from pathlib import Path
+import numpy as np
+from utils.s3 import save_pickle_to_s3
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+SEARCH_POSTCODE_URL = (
+    "https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode={postcode_input}"
+)
+BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
+
+
+def calculate_expiry_date(lodgement_date):
+    lodgement_date_dt = datetime.strptime(lodgement_date, '%Y-%m-%d')
+    expiry_date_dt = lodgement_date_dt + relativedelta(years=10) - timedelta(days=1)
+    return expiry_date_dt.strftime('%-d %B %Y')
+
+
+def retrieve_find_my_epc_data(uprn: int, postcode: str, address: str, expected_expiry_date: str):
+    """
+    For a post code and address, we pull out all the required data from the find my epc website
+    """
+
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
+                      'Chrome/111.0.0.0 Safari/537.36'
+    }
+    postcode_input = postcode.replace(" ", "+")
+    postcode_search = SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
+    postcode_response = requests.get(postcode_search, headers=headers)
+
+    address_cleaned = address.replace(",", "").replace(" ", "").lower()
+    postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
+    rows = postcode_res.find_all('tr', class_='govuk-table__row')
+
+    extracted_table = []
+    for row in rows:
+        # Extract the address and URL
+        address_tag = row.find('a', class_='govuk-link')
+        if address_tag is None:
+            continue
+        extracted_address = None
+        extracted_address_url = None
+        if address_tag:
+            extracted_address = address_tag.text.strip()
+            extracted_address_url = address_tag['href']
+
+            extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower()
+            if not extracted_address_cleaned.startswith(address_cleaned):
+                continue
+
+            # If the address is a match, we can extract the data
+
+        # Extract the expiry date
+        expiry_date_tag = row.find('td', class_='govuk-table__cell date')
+        expiry_date = None
+        if expiry_date_tag is not None:
+            expiry_date = expiry_date_tag.parent.find('span').text.strip()
+
+        extracted_table.append(
+            {
+                "extracted_address": extracted_address,
+                "extracted_address_url": extracted_address_url,
+                "expiry_date": expiry_date
+            }
+        )
+
+    extracted_table = [entry for entry in extracted_table if entry['expiry_date'] == expected_expiry_date]
+
+    if len(extracted_table) > 1:
+        print("Multiple candidates found, skipping for now")
+        return None
+
+    if not extracted_table:
+        print("No candidates found, skipping for now")
+        return None
+
+    chosen_epc = BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
+    epc_certificate = chosen_epc.split('/')[-1]
+
+    address_response = requests.get(chosen_epc, headers=headers)
+    address_res = BeautifulSoup(address_response.text, features="html.parser")
+
+    ratings = address_res.find('desc', {'id': 'svg-desc'}).text
+    current_rating = ratings.split(".")[0]
+    potential_rating = ratings.split(".")[1]
+
+    # Retrieve the energy consumption
+    bills = address_res.find('div', {'id': 'bills-affected'})
+    bills_list = bills.find_all('li')
+    if not bills_list:
+        return None
+    heating_text = bills_list[0].text
+    hot_water_text = bills_list[1].text
+
+    resulting_data = {
+        'extracted_uprn': uprn,
+        'extracted_address': address,
+        'epc_certificate': epc_certificate,
+        'current_epc_rating': current_rating.split(' ')[-6],
+        'current_epc_efficiency': int(current_rating.split(' ')[-1]),
+        'potential_epc_rating': potential_rating.split(' ')[-6],
+        "potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
+        "heating_text": heating_text,
+        "hot_water_text": hot_water_text,
+    }
+
+    return resulting_data
+
+
+def app():
+    """
+    This application is tasked with pulling a large quantity of data from the find my epc website, containing the
+    estimated energy consumption for properties
+    :return:
+    """
+
+    epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+
+    sample_size = 100
+
+    energy_consumption_data = []
+    for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
+        # Skip the first 50
+        if i < 36:
+            continue
+
+        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+        # Rename the columns to the same format as the api returns
+        data.columns = [c.replace("_", "-").lower() for c in data.columns]
+        # Take just date before the date threshold
+        data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+        data = data[~pd.isnull(data["uprn"])]
+        # Take just the newest EPC per uprn, based on lodgement-date
+        data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
+
+        data = data.sample(sample_size)
+        # We use the addreess data to find the related information
+
+        collected_data = []
+        for _, property_data in data.iterrows():
+            time.sleep(np.random.uniform(0.3, 2))
+
+            uprn = int(property_data["uprn"])
+            address = property_data["address1"]
+            postcode = property_data["postcode"]
+            expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
+
+            response = retrieve_find_my_epc_data(
+                uprn=uprn,
+                postcode=postcode,
+                address=address,
+                expected_expiry_date=expected_expiry_date
+            )
+            if response is None:
+                continue
+            collected_data.append(
+                {
+                    **response,
+                    "epc": property_data.to_dict(),
+                    "epc_directory": str(directory)
+                }
+            )
+
+        energy_consumption_data.extend(collected_data)
+
+    # Store the pickle in s3
+    save_time = datetime.now()
+    save_pickle_to_s3(
+        energy_consumption_data, bucket_name="retrofit-datalake-dev",
+        s3_file_name=f"energy_consumption_data/{save_time}.pkl"
+    )
--- a/etl/bill_savings/data_combining.py
+++ b/etl/bill_savings/data_combining.py
@ -0,0 +1,93 @@
+import re
+from datetime import datetime
+from tqdm import tqdm
+
+import pandas as pd
+
+from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
+
+# These columns we co-erce to strings before saving
+PROBLEMATIC_COLUMNS = ["main-heating-controls", "floor-level"]
+
+
+def extract_kwh_value(text):
+    """
+    Extract the numerical kWh value from a given string.
+
+    :param text: The input string containing the kWh value.
+    :return: The extracted numerical kWh value as an integer.
+    """
+    # Use regular expression to find the numerical value followed by "kWh per year"
+    match = re.search(r'([\d,]+) kWh per year', text)
+
+    if match:
+        # Remove commas from the extracted value and convert to integer
+        kwh_value = int(match.group(1).replace(',', ''))
+        return kwh_value
+    else:
+        # If no match is found, return None or raise an exception
+        return None
+
+
+def app():
+    """
+    Given the files written in our datalake in s3, this application will collate the data into a single file
+    and store it back in s3 for analysis
+    :return:
+    """
+
+    # Firstly, list all of the saved files in s3
+    data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
+
+    run_date = datetime.now().strftime("%Y-%m-%d")
+
+    complete_data = []
+    for files in tqdm(data_files):
+        dataset_run_date = files.split("/")[-1].split(".")[0]
+        # Extract the date from the file name
+        dataset_run_date = pd.Timestamp(dataset_run_date)
+
+        # Load the data from the file
+        data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
+
+        # We check that the retrieved energy consumption sufficiently matches the EPC data
+        internal_dataset = []
+        for x in data:
+            epc_data = x["epc"]
+            epc_sap = epc_data["current-energy-efficiency"]
+            epc_potential_sap = epc_data["potential-energy-efficiency"]
+            # Make sure this matches the extracted sap
+            if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
+                x["potential_epc_efficiency"]
+            ):
+                continue
+
+            heating_kwh = extract_kwh_value(x["heating_text"])
+            hot_water_kwh = extract_kwh_value(x["hot_water_text"])
+            internal_dataset.append(
+                {
+                    **epc_data,
+                    "heating_kwh": heating_kwh,
+                    "hot_water_kwh": hot_water_kwh,
+                    "dataset_run_date": dataset_run_date
+                }
+            )
+
+        complete_data.extend(internal_dataset)
+
+    df = pd.DataFrame(complete_data)
+    # Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
+    # the uprn level, so we dedupe based on the newest dataset_run_date
+
+    df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
+    df = df.drop(columns=["dataset_run_date"])
+
+    for col in PROBLEMATIC_COLUMNS:
+        df[col] = df[col].astype(str)
+
+    # Save the data back to s3, but this time as a parquet file
+    save_dataframe_to_s3_parquet(
+        bucket_name="retrofit-data-dev",
+        file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet",
+        df=df
+    )
--- a/etl/costs/app.py
+++ b/etl/costs/app.py
@ -7,10 +7,13 @@ from sqlalchemy.orm import Session
 from sqlalchemy import create_engine
 from backend.app.db.models.materials import Material
 from recommendations.recommendation_utils import calculate_r_value_per_mm
+import inspect

-DATA_DIRECTORY = Path(__file__).parent / "local_data" / "Hestia Materials.xlsx"
+src_file_path = inspect.getfile(lambda: None)
+
+DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20240626 Hestia Materials.xlsx"
 # Environment file is at the same level as this file
-ENV_FILE = Path(__file__).parent / "etl" / "costs" / ".env"
+ENV_FILE = Path(src_file_path).parent / "etl" / "costs" / ".env"
 dotenv.load_dotenv(ENV_FILE)

 DB_USERNAME = os.getenv('DB_USERNAME')
@ -87,7 +90,8 @@ def app():
            solid_floor_costs,
            ewi_costs,
            lel_costs,
-            flat_roof_costs
+            flat_roof_costs,
+            window_costs
        ]
    )

--- a/etl/customers/eon/deck_examples.py
+++ b/etl/customers/eon/deck_examples.py
@ -0,0 +1,27 @@
+"""
+This script contains bits of codes for examples to be included in the Deck
+"""
+
+from backend.SearchEpc import SearchEpc
+from dotenv import load_dotenv
+import os
+
+load_dotenv(dotenv_path="backend/.env")
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+searcher = SearchEpc(
+    address1="108 Blacklands",
+    postcode="ME19 6DP",
+    auth_token=EPC_AUTH_TOKEN,
+    os_api_key="",
+    property_type=None,
+    fast=False,
+)
+
+res = searcher.estimate_epc(
+    property_type="Bungalow",
+    built_form="Detached",
+    lmks_to_drop=["849273656952012102323315196229804"],
+    exclude_old=True
+)
--- a/etl/customers/eon/pilot_asset_list.py
+++ b/etl/customers/eon/pilot_asset_list.py
@ -64,7 +64,7 @@ def extract_mds_measures(config):
        measures.append({"district_heating_networks": "District heating networks"})

    if not pd.isnull(config["Elec Storage Htrs (Out of scope -Prov sum only)"]):
-        measures.append({"electric_storage_heaters": "Elec Storage Htrs (Out of scope -Prov sum only)"})
+        measures.append({"high_heat_retention_storage_heaters": "Elec Storage Htrs (Out of scope -Prov sum only)"})

    if not pd.isnull(config["Low Energy Bulbs"]):
        measures.append({"low_energy_lighting": "Low Energy Bulbs"})
@ -229,7 +229,8 @@ def app():
            "35a High Street",
            "35b High Street",
            "Flat Over 20 Holborough Road",
-            "Flat above 7 Malling Road"
+            "Flat above 7 Malling Road",
+            "Cobnut Barn",
        ]:
            print(config["Address"])
            uprn = None
@ -269,3 +270,33 @@ def app():
        "budget": None,
    }
    print(body)
+
+    # Optimised version where we specify the measures
+    measures = [
+        "external_wall_insulation",
+        "cavity_wall_insulation",
+        "loft_insulation",
+        "air_source_heat_pump",
+        "high_heat_retention_storage_heaters",
+        "solar_pv"
+    ]
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "measures": measures,
+        "budget": None,
+    }
+
+
+output = []
+for r in self.results:
+    output.append(r["DPA"])
+
+output = pd.DataFrame(output)
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@ -3,6 +3,7 @@ import pandas as pd
 from tqdm import tqdm
 import Levenshtein
 from backend.SearchEpc import SearchEpc
+from utils.s3 import read_dataframe_from_s3_parquet

 # Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
 # £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
@ -248,6 +249,13 @@ def app():
    """
    This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
    """
+
+    # TODO: This property:
+    #       https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
+    #       =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
+    #       is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
+    #       it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
+    #       and performing a singular filter for most recent EPC by UPRN
    # paths = [
    #     "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
    #     "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
@ -477,6 +485,35 @@ def app():
    portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
    portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)

+    # We check if any of these properties are in a conservation area
+    valuations = pd.read_excel("property value.xlsx")
+
+    uprn_filenames = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
+    )
+
+    geospatial_data = []
+    for _, row in tqdm(valuations.iterrows(), total=len(valuations)):
+        filtered_df = uprn_filenames[
+            (uprn_filenames["lower"] <= row["UPRN"])
+            & (uprn_filenames["upper"] >= row["UPRN"])
+            ]
+        if filtered_df.empty:
+            raise Exception("No match found")
+
+        filename = filtered_df.iloc[0]["filenames"]
+
+        spatial_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+        )
+        spatial = spatial_data[
+            spatial_data["UPRN"] == row["UPRN"]
+            ][["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]]
+        geospatial_data.append(spatial.to_dict("records")[0])
+
+    geospatial_data = pd.DataFrame(geospatial_data)
+    geospatial_data.to_excel("geospatial_data.xlsx", index=False)
+

 def company_aggregation():
    company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
@ -490,3 +527,79 @@ def company_aggregation():
    aggregation = aggregation.sort_values("Number of Properties", ascending=False)

    aggregation.to_excel("Company ownership aggregation.xlsx")
+
+
+def prepare_anonymised_data():
+    investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0)
+    investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0)
+    valuations = pd.read_excel("property value.xlsx", header=0)
+
+    # Merge these datasets
+    df = investment_50m_properties.merge(
+        investment_epc_data[
+            ["UPRN", "PROPERTY_TYPE", "BUILT_FORM", "TOTAL_FLOOR_AREA", "LODGEMENT_DATE", "POSTCODE"]
+        ].rename(
+            columns={
+                "PROPERTY_TYPE": "Property Type",
+                "BUILT_FORM": "Property Archetype",
+                "TOTAL_FLOOR_AREA": "Total Floor Area",
+                "LODGEMENT_DATE": "Date EPC Lodged",
+                "POSTCODE": "Postcode on EPC"
+            }
+        ),
+        how="inner",
+        on="UPRN"
+    ).merge(
+        valuations.drop(columns=["ADDRESS", "POSTCODE"]).rename(
+            columns={
+                "Zoopla Valuation": "Expected Valuation",
+                "Zoopla Lower Bound": "Valuation - Lower Bound",
+                "Zoopla Upper Bound": "Valuation - Upper Bound",
+            }
+        ),
+        how="inner",
+        on="UPRN"
+    ).rename(
+        columns={
+            "CURRENT_ENERGY_RATING": "Current EPC",
+            "CURRENT_ENERGY_EFFICIENCY": "Current SAP Score",
+            "epc_address": "Address on EPC"
+        }
+    ).drop(
+        columns=["Title Number", "match_type", "UPRN"]
+    )
+
+    redacted_owner_names = df[["Company Registration No. (1)"]].drop_duplicates()
+    redacted_owner_names["Owner"] = ["Owner" + str(i) for i in range(1, len(redacted_owner_names) + 1)]
+
+    df = df.merge(
+        redacted_owner_names, how="left", on="Company Registration No. (1)"
+    )
+
+    df = df.drop(columns=["Company Registration No. (1)", "Proprietor Name (1)", "Property Address"])
+    df = df.sort_values(["Owner", "Date EPC Lodged"], ascending=False)
+
+    redacted_index = []
+    for _, owner_properties in df.groupby("Owner"):
+        top_50_percent = round(owner_properties.shape[0] / 2 + 0.00001)
+        indexes = owner_properties.tail(
+            owner_properties.shape[0] - top_50_percent
+        ).index
+
+        redacted_index.extend(indexes.tolist())
+
+    import numpy as np
+    # Redact addresses and postcodes
+    df["Address on EPC"] = np.where(
+        df.index.isin(redacted_index),
+        "Redacted",
+        df["Address on EPC"]
+    )
+
+    df["Postcode on EPC"] = np.where(
+        df.index.isin(redacted_index),
+        "Redacted",
+        df["Postcode on EPC"]
+    )
+
+    df.to_excel("Property List - 50% redacted.xlsx", index=False)
--- a/etl/customers/lhp/30_may_2024_data_pull.py
+++ b/etl/customers/lhp/30_may_2024_data_pull.py
@ -0,0 +1,148 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by LHP
+    :return:
+    """
+    # asset_list = read_excel_from_s3(
+    #     bucket_name="retrofit-datalake-dev",
+    #     file_key="customers/guiness/TGP CW Properties PV.xlsx",
+    #     header_row=0
+    # )
+    asset_list = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Echo4 3.4.24.xlsx", header=0)
+
+    epc_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        full_address = home["ADDRESS"]
+        address_split = full_address.split(",")
+        address1 = address_split[0].strip()
+        postcode = address_split[-1].strip()
+
+        searcher = SearchEpc(
+            address1=address1,
+            postcode=postcode,
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_address": full_address,
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description"
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["ADDRESS"],
+        right_on=["asset_list_address"]
+    )
+
+    asset_list = asset_list.drop(columns=["asset_list_address"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "total-floor-area": "Property Floor Area",
+        "construction-age-band": "Property Age Band",
+        "floor-height": "Property Floor Height",
+        "number-habitable-rooms": "Number of Habitable Rooms",
+        "walls-description": "Wall Construction",
+        "roof-description": "Roof Construction",
+        "mainheat-description": "Heating Type",
+        "secondheat-description": "Secondary Heating",
+        "transaction-type": "Reason for last EPC"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(property_type=x["Property Type"]), axis=1
+    )
+
+    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ), axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["Archetype"]
+        ),
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"],
+        axis=1
+    )
+
+    # Store as an excel
+    filename = "LHP EPC Data pull.xlsx"
+    asset_list.to_excel(filename, index=False)
--- a/etl/customers/northern_gorup/test_asset_list.py
+++ b/etl/customers/northern_gorup/test_asset_list.py
@ -0,0 +1,43 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 81
+
+
+def app():
+    asset_list = [
+        {
+            'uprn': 10070868263,
+            "address": "Apartment 307, Flint Glass Wharf",
+            "postcode": "M4 6AD",
+        },
+        {
+            'uprn': 10070868244,
+            "address": "Apartment 106, Flint Glass Wharf",
+            "postcode": "M4 6AD",
+        }
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
--- a/etl/customers/places_for_people/EPC
+++ b/etl/customers/places_for_people/EPC
@ -0,0 +1,156 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+import numpy as np
+
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by LHP
+    :return:
+    """
+
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/Places for People NORTH WEST - EPC DATA PULL REQUEST.xlsx", header=0
+    )
+
+    epc_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        full_address = home["Address"]
+
+        address1 = home["AddressLine1"]
+        postcode = home["Postcode"]
+
+        searcher = SearchEpc(
+            address1=address1,
+            postcode=postcode,
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_address": full_address,
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description"
+        ]
+    ]
+
+    # epc_df.to_csv("pfp sales data.csv", index=False)
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["Address"],
+        right_on=["asset_list_address"]
+    )
+
+    asset_list = asset_list.drop(columns=["asset_list_address"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "EPC Property Type",
+        "built-form": "EPC Archetype",
+        "total-floor-area": "EPC Property Floor Area",
+        "construction-age-band": "EPC Property Age Band",
+        "floor-height": "EPC Property Floor Height",
+        "number-habitable-rooms": "EPC Number of Habitable Rooms",
+        "walls-description": "EPC Wall Construction",
+        "roof-description": "EPC Roof Construction",
+        "mainheat-description": "EPC Heating Type",
+        "secondheat-description": "EPC Secondary Heating",
+        "transaction-type": "Reason for last EPC"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(
+            property_type=x["EPC Property Type"]
+        ) if not pd.isnull(x["EPC Property Type"]) else None, axis=1
+    )
+
+    asset_list["EPC Property Floor Area"] = asset_list["EPC Property Floor Area"].astype(float)
+    asset_list["EPC Number of Habitable Rooms"] = np.where(
+        asset_list["EPC Number of Habitable Rooms"] == "",
+        None,
+        asset_list["EPC Number of Habitable Rooms"]
+    )
+    asset_list["EPC Number of Habitable Rooms"] = asset_list["EPC Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["EPC Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["EPC Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ), axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["EPC Property Floor Height"]) if x["EPC Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["EPC Archetype"]
+        ),
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["EPC Roof Construction"]).process()[
+            "insulation_thickness"] if not pd.isnull(x["EPC Roof Construction"]) else None,
+        axis=1
+    )
+
+    # Store as an excel
+    filename = "Places for People NORTH WEST - EPC DATA PULL.xlsx"
+    asset_list.to_excel(filename, index=False)
--- a/etl/customers/places_for_people/parity_comparison.py
+++ b/etl/customers/places_for_people/parity_comparison.py
@ -0,0 +1,164 @@
+"""
+This script is used to pull together some case studies for the Parity Projects comparison
+"""
+
+import pandas as pd
+from backend.SearchEpc import SearchEpc
+from dotenv import load_dotenv
+import os
+
+load_dotenv("backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+parity_measures = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Places For People/Parity Sample All Addresses and Measures.xlsx",
+    sheet_name="Total Measures"
+)
+
+solar_measures = parity_measures[parity_measures["Category"] == "SolarPV"]
+
+example_1 = parity_measures[
+    parity_measures["Address Id (used by website)"] == 6125299
+    ].copy()
+
+config = {
+    "address": "14 Victoria Road",
+    "postcode": "BD20 8SY",
+    "uprn": 100050346517
+}
+
+# Point 1:
+# Parity tends to re-score the EPCs, even if they're extrememly recent.
+# For example for '14, Victoria Road, Cross Hills, KEIGHLEY, North Yorkshire, ENGLAND, BD20 8SY'
+# The most recent EPC was done 15 May 2023, and landed at a 66D, however for some reason, parity re-score this
+# home to be a 63.91. It's unclear why this is done
+
+example_1_measures = example_1[["MeasureGroupName", "Individual SAP increase"]].copy()
+# - LEDS: 0.25 SAP points
+# - 300mm of loft insulation from 200mm: 0.43 SAP points - where is this deduced from? Since the latest survey
+# indicates 250mm insulation in place
+# - Check construction of unknown party wall and fill cavity if appropriate: 0.12 SAP points (highly speculative,
+# not based on any data)
+# - Block open chimneys: 1.61 SAP points - latest survey showed 0 open fireplaces
+# - ASHP (45 degree emitters) with enhanced existing radiator central heating and hot water, from E rated gas boiler
+# 6.38 SAP points
+# - 4kWp PV array south and 30 degree pitch with no shading: 30.24 SAP points
+
+# Notes on solar - 30.34 seems like a lot
+# 400 watt is the solar panel output
+# Let's do a test for this property
+# This would be 10 solar panels
+# Using typical solar panel dimensions, this would be 19.63555m2 of roof space
+# The area of the roof is between 60 - 64.5 m2 (we use a API to get the roof data), implying only
+# around 30% of the roof is covered by solar panels
+# Using our machine learning model to simulate the impact of this on SAP, this would more likely result in
+# a
+
+from utils.s3 import read_dataframe_from_s3_parquet
+
+training_data = read_dataframe_from_s3_parquet(
+    bucket_name="retrofit-data-dev",
+    file_key="sap_change_model/2024-06-09-10-36-53/dataset_rooms.parquet"
+)
+# Look for properties where the only difference is solar
+ending_cols = [
+    c for c in training_data.columns if "_ending" in c and "photo_supply" not in c
+]
+ending_cols = [
+    c for c in ending_cols if
+    c not in ["sap_ending", "heat_demand_ending", "carbon_ending", "transaction_type_ending", "days_to_ending"]
+]
+
+column_pairs = {}
+for col in ending_cols:
+    starting = col.split("_ending")[0]
+    if starting + "_starting" in training_data.columns:
+        starting_col = starting + "_starting"
+    else:
+        starting_col = starting
+
+    column_pairs[col] = starting_col
+
+filtered = training_data.copy()
+# Take rows that had solar installs
+filtered = filtered[filtered["photo_supply_ending"] != filtered["photo_supply_starting"]]
+for ending_col, starting_col in column_pairs.items():
+    filtered = filtered[filtered[ending_col] == filtered[starting_col]]
+    print(f"ending_col: {ending_col}, filtered shape: {filtered.shape}")
+
+avg_change = filtered.groupby("photo_supply_ending")["rdsap_change"].mean().reset_index()
+
+# I've take every single case of there being two EPCs for a property, where the only difference between the first
+# and second is the solar installation. This is 2692 properties, across the UK. In only 4 instances has this resulted in
+# 30 or more SAP points
+
+
+# Some functions based on the SAP methodology:
+import numpy as np
+
+total_floor_area = 50
+occupants = calculate_occupants(total_floor_area)
+appliances_energy_use = estimate_electrical_appliances(occupants, total_floor_area)
+cooking_energy_use = estimate_cooking(occupants)
+
+
+def calculate_occupants(total_floor_area):
+    """
+    From Table 1b
+    :param total_floor_area:
+    :return:
+    """
+    return 1 + (1.76 * (1 - np.exp(-0.000349 * (total_floor_area - 13.9) * (total_floor_area - 13.9))) + 0.0013 * (
+        total_floor_area - 13.9))
+
+
+def estimate_electrical_appliances(occupants, total_floor_area):
+    """
+    From seciont L2 Electrical appliances
+    :param occupants:
+    :param total_floor_area:
+    :return:
+    """
+    e_a = 207.8 * np.power(total_floor_area * occupants, 0.4717)
+
+    days_in_month = {
+        1: 31,
+        2: 28,
+        3: 31,
+        4: 30,
+        5: 31,
+        6: 30,
+        7: 31,
+        8: 31,
+        9: 30,
+        10: 31,
+        11: 30,
+        12: 31
+    }
+
+    eam = 0
+    for m in range(1, 13):
+        nm = days_in_month[m]
+        eam += e_a * (1 + 0.157 * np.cos(2 * np.pi * (m - 1.78) / 12)) * nm / 365
+
+    return eam
+
+
+def estimate_cooking(occupants):
+    """
+    From section L3 Cooking
+    :param occupants:
+    :return:
+    """
+
+    return 35 + 7 * occupants
+
+
+primary_energy_per_m2 = 288  # kWh/m2 per year
+primary_energy_regulated = primary_energy_per_m2 * total_floor_area
+
+primary_energy_factor_electricity = 1.1  # Example factor
+primary_energy_appliances = appliances_energy_use * primary_energy_factor_electricity
+primary_energy_cooking = cooking_energy_use * primary_energy_factor_electricity * 365  # Annualize cooking energy
+
+total_primary_energy_use = primary_energy_regulated + primary_energy_appliances
--- a/etl/customers/places_for_people/route_march.py
+++ b/etl/customers/places_for_people/route_march.py
@ -295,6 +295,49 @@ def main():

    addresses_df2.to_excel("Places For People EPC data with surveyor.xlsx", index=False)

+    # Read in
+    df = pd.read_excel("Places For People EPC data with surveyor.xlsx")
+    df = df[
+        df["assessor_name"].isin(
+            [
+                "Arsalan Khalid", "Kieran Bradnock", "Wayne Davies", "Lindsay Sands", "Bruce Nethercot",
+                "Christopher Hearn", "Robert Sigerson", "Daniel Riddle", "Leroy Sands",
+            ]
+        )
+    ]
+
+    # Get the EPC
+    heights = []
+    for _, row in tqdm(df.iterrows(), total=len(df)):
+        searcher = SearchEpc(
+            address1=str(row["Matched EPC Address"]),
+            postcode=str(row["POSTCODE"]),
+            uprn=str(int(row["uprn"])),
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+
+        height = {
+            "uprn": row["uprn"],
+            "floor_height": searcher.newest_epc["floor-height"]
+        }
+        heights.append(height)
+
+    df = df.merge(
+        pd.DataFrame(heights),
+        how="left",
+        on="uprn"
+    )
+
+    df.to_excel("WF surveyors with floor heights.xlsx", index=False)
+

 if __name__ == "__main__":
    main()
--- a/etl/customers/stonewater/map_app/Stonewater
+++ b/etl/customers/stonewater/map_app/Stonewater
@ -0,0 +1 @@
+[{"EPC": "D", "count": 1718}, {"EPC": "C", "count": 1343}, {"EPC": "E", "count": 538}, {"EPC": "F", "count": 80}, {"EPC": "B", "count": 52}, {"EPC": "G", "count": 3}, {"EPC": "A", "count": 2}]
--- a/etl/customers/stonewater/map_app/Stonewater
+++ b/etl/customers/stonewater/map_app/Stonewater
--- a/etl/customers/stonewater/map_app/Stonewater
+++ b/etl/customers/stonewater/map_app/Stonewater
@ -0,0 +1 @@
+[{"is_real_epc": true, "count": 3736}, {"is_real_epc": false, "count": 1509}]
--- a/etl/customers/stonewater/map_app/assets/favicon.ico
+++ b/etl/customers/stonewater/map_app/assets/favicon.ico
--- a/etl/customers/stonewater/map_app/assets/hestia-logo.png
+++ b/etl/customers/stonewater/map_app/assets/hestia-logo.png
--- a/etl/customers/stonewater/map_app/assets/osmosis-Logo.svg
+++ b/etl/customers/stonewater/map_app/assets/osmosis-Logo.svg
--- a/etl/customers/stonewater/map_app/assets/stonewater-logo.png
+++ b/etl/customers/stonewater/map_app/assets/stonewater-logo.png
--- a/etl/customers/stonewater/map_app/callbacks.py
+++ b/etl/customers/stonewater/map_app/callbacks.py
--- a/etl/customers/stonewater/map_app/config.py
+++ b/etl/customers/stonewater/map_app/config.py
@ -0,0 +1,8 @@
+import os
+import json
+import dotenv
+
+# When running locally, we'll need to load the .env file
+dotenv.load_dotenv()
+
+MAPBOX_ACCESS_TOKEN = os.getenv("MAPBOX_ACCESS_TOKEN")
--- a/etl/customers/stonewater/map_app/map_page.py
+++ b/etl/customers/stonewater/map_app/map_page.py
@ -0,0 +1,230 @@
+import dash_bootstrap_components as dbc
+from dash import html, dcc
+import json
+import plotly.graph_objects as go
+import pandas as pd
+
+from config import MAPBOX_ACCESS_TOKEN
+
+
+def make_real_epc_piechart(real_epc_breakdown):
+    labels = [x["is_real_epc"] for x in real_epc_breakdown]
+    values = [x["count"] for x in real_epc_breakdown]
+
+    marker_colors = ["#027fa6", "rgb(225 225 225)"]
+
+    fig = go.Figure(
+        data=[go.Pie(labels=labels, values=values, marker_colors=marker_colors)],
+    )
+
+    fig.update_layout(margin={"t": 0})
+
+    plot = dcc.Graph(figure=fig, config={"displayModeBar": False})
+
+    return plot
+
+
+def make_epc_rating_piechart(epc_rating_breakdown):
+    # Re-order from G to A
+    epc_rating_breakdown = sorted(epc_rating_breakdown, key=lambda x: x["EPC"])
+
+    labels = [x["EPC"] for x in epc_rating_breakdown]
+    values = [x["count"] for x in epc_rating_breakdown]
+
+    marker_colors = ["#117d58", "#2da55c", "#8dbd40", "#f7cd14", "#f3a96a", "#ef8026", "#e41e3b"]
+
+    fig = go.Figure(
+        data=[go.Pie(labels=labels, values=values, marker_colors=marker_colors, sort=False)],
+    )
+
+    fig.update_layout(margin={"t": 0})
+
+    plot = dcc.Graph(figure=fig, config={"displayModeBar": False})
+
+    return plot
+
+
+def make_map(locations):
+    if not locations:
+        return None
+
+    df = pd.DataFrame(locations)
+
+    # Create custom hover text
+    df['hover_text'] = df.apply(
+        lambda row: f"UPRN: {int(row['uprn'])}<br>Address: {row['standardised_address']}<br>Postcode: "
+                    f"{row['standardised_postcode']}<br>Latitude: {row['LATITUDE']}<br>Longitude: {row['LONGITUDE']}",
+        axis=1)
+
+    data = [
+        go.Scattermapbox(
+            lat=df["LATITUDE"].tolist(),
+            lon=df["LONGITUDE"].tolist(),
+            mode="markers",
+            marker=go.scattermapbox.Marker(size=10, color="#027fa6"),
+            text=df["hover_text"],  # Use the custom hover text
+            hoverinfo='text'
+        )
+    ]
+
+    layout = go.Layout(
+        autosize=True,
+        hovermode="closest",
+        mapbox=go.layout.Mapbox(
+            accesstoken=MAPBOX_ACCESS_TOKEN,
+            bearing=0,
+            center=go.layout.mapbox.Center(lat=53, lon=-1.5),
+            pitch=0,
+            zoom=5,
+        ),
+        margin={"t": 0},
+    )
+
+    fig = go.Figure(data=data, layout=layout)
+
+    plot = dcc.Graph(figure=fig, config={"displayModeBar": False})
+
+    return plot
+
+
+def layout():
+    # Get the data
+    with open("Stonewater Mapping Data.json", "r") as file:
+        locations = json.load(file)
+
+    # Get the EPC breakdown data
+    with open("Stonewater real EPC breakdown.json") as file:
+        real_epc_breakdown = json.load(file)
+
+    # Get the EPC ratings data
+    with open("Stonewater EPC rating breakdown.json") as file:
+        epc_rating_breakdown = json.load(file)
+
+    page = dbc.Container(
+        [
+            dbc.Row(
+                dbc.Col(
+                    html.Div(
+                        [
+                            # Banner with logos
+                            dbc.Row(
+                                [
+                                    dbc.Col(
+                                        html.Img(src="assets/stonewater-logo.png", height="50px"),
+                                        width="auto"
+                                    ),
+                                    dbc.Col(
+                                        html.Img(src="assets/osmosis-Logo.svg", height="50px"),
+                                        width="auto"
+                                    ),
+                                    dbc.Col(
+                                        html.Div(
+                                            style={"color": "white", "font-size": "1.5rem", "font-weight": "bold"}
+                                        ),
+                                        width=True,
+                                        className="text-center"
+                                    )
+                                ],
+                                className="align-items-center",
+                                style={"background-color": "#027fa6", "padding": "10px"}
+                            ),
+                            dbc.Row(
+                                [
+                                    dbc.Col("Powered by", style={"color": "#027fa6", "fontSize": "1rem", 'zIndex': 10},
+                                            width="auto"),
+                                    dbc.Col(
+                                        html.A(
+                                            html.Img(src="assets/hestia-logo.png", height="50px"),
+                                            href="https://hestia.homes",
+                                        ),
+                                        width="auto",
+                                        style={"margin-left": "-60px"}
+                                    ),
+                                ],
+                                justify='left',
+                                align="center"
+                            ),
+                            html.H1(
+                                "Stonewater Survey Map",
+                                style={"font-size": "2.5rem", "font-weight": "bold", "margin-bottom": "20px"}
+                            ),
+                            html.P(
+                                "This map shows the location of the properties that are to be surveyed by Osmosis.",
+                                style={"font-size": "1.25rem", "margin-bottom": "40px"}
+                            ),
+                        ],
+                        className="text-center"
+                    ),
+                    width=12
+                ),
+                className="mt-5"
+            ),
+            dbc.Row(
+                dbc.Col(
+                    make_map(locations=locations),
+                    width=10,
+                    align="center",
+                    className="text-center"
+                ),
+                justify="center"
+            ),
+            dbc.Row(
+                [
+                    dbc.Col(
+                        [
+                            html.Div(
+                                "Breakdown of real EPCs",
+                                style={"fontSize": "1.5rem", "fontWeight": "bold", "marginBottom": "1em"},
+                                className='text-center'
+                            ),
+                            html.Div(
+                                "This pie chart shows the proportion of real EPCs in the asset list. Currently, "
+                                "there are EPCs for 3736 of the 5245 properties that have a UPRN in the asset list",
+                                style={"marginBottom": "1em"}
+                            ),
+                            make_real_epc_piechart(real_epc_breakdown),
+                        ],
+                        width={"size": 5},
+                    ),
+                    dbc.Col(
+                        [
+                            html.Div(
+                                "EPC Ratings for properties with an EPC",
+                                style={"fontSize": "1.5rem", "fontWeight": "bold", "marginBottom": "1em"},
+                                className='text-center'
+                            ),
+                            html.Div(
+                                [
+                                    "This pie chart shows the breakdown of EPC ratings, for properties that currently "
+                                    "have an EPC. "
+                                    "The ratings range from A to G, where surprisingly, there are two EPC properties "
+                                    "that were initially "
+                                    "expected by Parity's modelled SAP, to be EPC D or below. These properties can be"
+                                    " seen ",
+                                    html.A("here",
+                                           href="https://find-energy-certificate.service.gov.uk/energy-certificate"
+                                                "/2708-5001-7327-6090-7284",
+                                           target="_blank"),
+                                    " and ",
+                                    html.A("here",
+                                           href="https://find-energy-certificate.service.gov.uk/energy-certificate"
+                                                "/1037-4032-1009-0361-7292",
+                                           target="_blank"),
+                                    "."
+                                ],
+                                style={"marginBottom": "1em"}
+                            ),
+                            make_epc_rating_piechart(epc_rating_breakdown),
+                        ],
+
+                        width={"size": 5},
+                    ),
+                ],
+                justify="center"
+            )
+        ],
+        fluid=True,
+        className="p-5"
+    )
+
+    return page
--- a/etl/customers/stonewater/map_app/requirements.txt
+++ b/etl/customers/stonewater/map_app/requirements.txt
@ -0,0 +1,12 @@
+dash==2.8.1
+gunicorn
+pandas
+dash-bootstrap-components==1.3.1
+boto3
+dropbox
+Flask-Caching
+dash-extensions
+mysql-connector-python
+sqlalchemy
+werkzeug==2.3.7
+python-dotenv
--- a/etl/customers/stonewater/map_app/server.py
+++ b/etl/customers/stonewater/map_app/server.py
@ -0,0 +1,46 @@
+import logging
+import secrets
+
+import dash_bootstrap_components as dbc
+from dash import html
+from dash_extensions.enrich import DashProxy, MultiplexerTransform
+import flask
+from map_page import layout
+
+logger = logging.getLogger(__name__)
+
+# We just use a simple secret key for the moment
+
+SECRET_KEY = secrets.token_hex(24)
+
+
+def init_app():
+    app = DashProxy(
+        __name__,
+        server=flask.Flask(__name__),
+        suppress_callback_exceptions=True,
+        external_stylesheets=[
+            dbc.themes.BOOTSTRAP,
+            dbc.icons.FONT_AWESOME,
+            "https://fonts.googleapis.com/css?family=Comfortaa",
+        ],
+        transforms=[MultiplexerTransform()]
+    )
+
+    server = app.server
+
+    # Set app config
+    server.config.update(
+        SECRET_KEY=SECRET_KEY,
+    )
+
+    app.title = "Hestia X Stonewater"
+
+    # Define the layout
+    app.layout = layout()
+    app._favicon = "favico.ico"
+
+    return app
+
+
+app = init_app()
--- a/etl/customers/stonewater/map_app/wsgi.py
+++ b/etl/customers/stonewater/map_app/wsgi.py
@ -0,0 +1,8 @@
+# Callbacks must be imported to run the app
+import callbacks  # NOQA
+from server import app
+
+application = app.server
+
+if __name__ == "__main__":
+    app.run_server(port=8080, debug=True, host="0.0.0.0")
--- a/etl/customers/stonewater/no_matches.py
+++ b/etl/customers/stonewater/no_matches.py
@ -0,0 +1,165 @@
+no_matches = [
+    {
+        'internal_id': 4626, 'full_address': '1 Dean Lane, Sixpenny Handley, Salisbury, SP5 5AS', 'postcode': 'SP5 5AS',
+        'Note': 'No match found - all addresses in this postcode are for Mulberry Court, Sixpenny Handley, Salisbury, '
+                'SP5 5AS, addresses not recognised by Zoopla - possibly the postcode is incorrect and this could be'
+                'Handley Enterprises Ltd, Unit 1 Dean Lane, Sixpenny Handley, Salisbury, SP5 5PA.'
+                'Or this could be 1 Mulberry Court Sixpenny Handley, Salisbury SP5 5AS'
+    },
+    {
+        'internal_id': 4627, 'full_address': '3 Dean Lane, Sixpenny Handley, Salisbury, SP5 5AS', 'postcode': 'SP5 5AS',
+        'Note': 'No match found - all addresses in this postcode are for Mulberry Court, Sixpenny Handley, Salisbury, '
+                'SP5 5AS, addresses not recognised by Zoopla - possibly the postcode is incorrect and this could be'
+                '2 Town Farm House, Dean Lane, Sixpenny Handley, Salisbury, SP5 5PA'
+                'Or this could be 3 Mulberry Court Sixpenny Handley, Salisbury SP5 5AS'
+    },
+    {
+        'internal_id': 4628, 'full_address': '5 Dean Lane, Sixpenny Handley, Salisbury, SP5 5AS', 'postcode': 'SP5 5AS',
+        'Note': 'No match found - all addresses in this postcode are for Mulberry Court, Sixpenny Handley, Salisbury, '
+                'SP5 5AS, addresses not recognised by Zoopla - possibly the postcode is incorrect and this could be'
+                '4 Town Farm House, Dean Lane, Sixpenny Handley, Salisbury, SP5 5PA'
+                'Or this could be 5 Mulberry Court Sixpenny Handley, Salisbury SP5 5AS'
+    },
+    {
+        'internal_id': 544, 'full_address': 'Room 1, Sawr, PO Box 1354, Bedford, MK41 5AB', 'postcode': 'MK41 5AB',
+        "Note": "Postcode deleted in April 2024: https://checkmypostcode.uk/mk415ab"
+    },
+    {
+        'internal_id': 5116, 'full_address': '3 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 3 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988896'
+    },
+    {
+        'internal_id': 5114, 'full_address': '4 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 4 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988897'
+    },
+    {
+        'internal_id': 5115, 'full_address': '2 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 2 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988895'
+    },
+    {
+        'internal_id': 5113, 'full_address': '6 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 6 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988899'
+    },
+    {
+        'internal_id': 5112, 'full_address': '1 Huntspond Road, Titchfield, Fareham, PO14 4SS', 'postcode': 'PO14 4SS',
+        'Note': 'Is this 1 St Francis Court, 195 Hunts Pond Road, Fareham, PO14 4SS, uprn: 100061988894'
+    },
+    {
+        'internal_id': 3846, 'full_address': '2 Beaufort Road, Southbourne, Bournemouth, BH6 5BD',
+        'postcode': 'BH6 5BD',
+        'Note': "2 Beaufort Road, Southbourne, Bournemouth is listed under the postcode BH6 5AL - is there a typo in "
+                "the postcode?"
+    },
+    {
+        'internal_id': 4497, 'full_address': '11 Brokenford Lane, Totton, Southampton, SO40 9LZ',
+        'postcode': 'SO40 9LZ',
+        'Note': "This postcode doesn't appear to exist, closest is 10 brokenford lane, Totton, Southampton, SO40 9DW."
+                "What should this be?"
+    },
+    {
+        'internal_id': 4181, 'full_address': '25a Eastcott Road, Old Town, Swindon, SN1 3PA', 'postcode': 'SN1 3PA',
+        'Note': 'All addresses at this postcode are for Bow Court. '
+                'Closest match is 25 Eastcott Road, Swindon, SN1 3LT, but there is no 25A'
+    },
+    {
+        'internal_id': 5447, 'full_address': '3 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "These is no 'Send Road' at this postcode. There are a few possible matches, e.g. Flat 3, "
+                "1 Send Road, RG4 8EH"
+    },
+    {
+        'internal_id': 5449, 'full_address': '5 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 5450, 'full_address': '6 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 5446, 'full_address': '1 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 5448, 'full_address': '4 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 5451, 'full_address': '7 Send Road, Send Road, Reading, RG4 8EP', 'postcode': 'RG4 8EP',
+        "Note": "Same as for 3 Send Road"
+    },
+    {
+        'internal_id': 4547, 'full_address': '2 Cecil Terrace, Bemerton, Salisbury, SP2 9NE', 'postcode': 'SP2 9NE',
+        "Note": "Addresses for this postcode are for The Croft, SP2 9NE. Should this be 2 Cecil Terrace SP2 9ND, with"
+                "uprn: 100121039798 ?"
+    },
+    {
+        'internal_id': 4549, 'full_address': '4 Cecil Terrace, Bemerton, Salisbury, SP2 9NE', 'postcode': 'SP2 9NE',
+        "Note": "Addresses for this postcode are for The Croft, SP2 9NE. Should this be 4 Cecil Terrace SP2 9ND?"
+    },
+    {
+        'internal_id': 3601, 'full_address': '20 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should this be 20 Constitution Hill Gardens, Poole, BH14 0PY? (i.e. postcode is wrong) "
+                "uprn: 10001086693"
+    },
+    {
+        'internal_id': 3592, 'full_address': '7 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"
+    },
+    {
+        'internal_id': 3594, 'full_address': '9 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"
+    },
+    {
+        'internal_id': 3591, 'full_address': '6 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"
+    },
+    {
+        'internal_id': 3593, 'full_address': '8 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"},
+    {
+        'internal_id': 3590, 'full_address': '5 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"},
+    {
+        'internal_id': 3589, 'full_address': '3 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+        "Note": "Should the postcode be BH14 0PY ?"},
+    {
+        'internal_id': 3600, 'full_address': '18 Constitution Hill, Parkstone, Poole, BH14 0PX',
+        'postcode': 'BH14 0PX', "Note": "Should the postcode be BH14 0PY ?"},
+    {
+        'internal_id': 3599, 'full_address': '17 Constitution Hill, Parkstone, Poole, BH14 0PX',
+        'postcode': 'BH14 0PX', "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3598, 'full_address': '15 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3608, 'full_address': '26 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3610, 'full_address': '30 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3603, 'full_address': '22 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3612, 'full_address': '32 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3595, 'full_address': '10 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+    {'internal_id': 3613, 'full_address': '34 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0PY ?"},
+
+    {'internal_id': 3597, 'full_address': '12 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3602, 'full_address': '21 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3606, 'full_address': '19 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3604, 'full_address': '23 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3605, 'full_address': '25 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3609, 'full_address': '29 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3596, 'full_address': '11 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3607, 'full_address': '27 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 3611, 'full_address': '31 Constitution Hill, Parkstone, Poole, BH14 0PX', 'postcode': 'BH14 0PX',
+     "Note": "Should the postcode be BH14 0QB ?"},
+    {'internal_id': 5622, 'full_address': '26 Roman Way, Andover, SP10 5HZ', 'postcode': 'SP10 5HZ',
+     'Note': 'Shoul this postcode be SP10 5JU ?'}
+]
--- a/etl/customers/stonewater/outputs
+++ b/etl/customers/stonewater/outputs
@ -0,0 +1,132 @@
+"""
+This script prepares some outputs for the stonewater project, 27th June 2024
+
+The work done so far has been data cleaning and clustering.
+In this script, we do the following things:
+
+1) Match the clustering data to the archetypes
+2) Do some basic analysis on the data
+3) Mapping of the archetypes
+"""
+import pandas as pd
+import json
+from utils.s3 import read_pickle_from_s3
+
+stonewater_asset_list = pd.read_csv("Stonewater asset list with archetypes V2.csv")
+archetyped_asset_list = stonewater_asset_list[
+    [
+        "internal_id", "customer_asset_id", "external_address_id", "udprn", "uprn", "cluster",
+        "archetype_representative", "rank"
+    ]
+].copy()
+archetyped_asset_list = archetyped_asset_list[archetyped_asset_list["rank"] != "NO ARCHETYPE"]
+archetyped_asset_list["rank"] = archetyped_asset_list["rank"].astype(int)
+# Sort
+archetyped_asset_list = archetyped_asset_list.sort_values(by=["cluster", "rank"])
+
+# Read in and merge on clustering features
+clustering_features = read_pickle_from_s3(
+    bucket_name="retrofit-data-dev",
+    s3_file_name="customers/Stonewater/clustering/clustering_dataframe.pkl"
+)
+
+# Move property-type and built-form to the first two columns
+columns_to_move = ['property-type', 'built-form']
+
+# Get the remaining columns
+remaining_columns = [col for col in clustering_features.columns if col not in columns_to_move]
+
+# Create the new column order
+new_column_order = columns_to_move + remaining_columns
+
+# Reorder the DataFrame
+clustering_features = clustering_features[new_column_order]
+
+archetyped_asset_list = archetyped_asset_list.merge(
+    clustering_features,
+    on="internal_id",
+    how="inner"
+)
+
+archetyped_asset_list = archetyped_asset_list.rename(
+    columns={
+        "internal_id": "Osm. ID",
+        "customer_asset_id": "Org. ref.",
+        "external_address_id": "Address ID",
+        "cluster": "Archetype ID",
+        "archetype_representative": "Archetype Representative",
+        "rank": "Archetype Group Rank",
+    }
+)
+archetyped_asset_list["uprn"] = archetyped_asset_list["uprn"].astype('Int64')
+# Create an extract of the features
+
+
+# Look at number of combinations
+# - If we look at the number of combinations of property type & built form, we have 25 unique combinations
+# - If we look at the number of combinations of property type, built form, and walls description, this jumps
+# massively to 237 unique combinations
+# - Adding roof description to the mix, we have 857 unique combinations
+# - Adding floor description, we have 1278 unique combinations
+# This doesn't even begin to consider the other variables that we have in the dataset, such as the property dimensions,
+# location, and other factors.
+# Ideally, we would perfectly separate these variables but this is not possible, given the constraint of needing ~450
+# archetypes. We will need to make some compromises here. This is where a clustering algorithm can help us.
+# We don't end up with perfect separation but we can get a good enough separation to make the archetypes useful, and can
+# base the archetypes on a number of energy performance metrics, as well as location and other factors.
+# archetyped_asset_list[
+#     ["property-type", "built-form", "walls-description", "roof-description",
+#      "floor-description"]].drop_duplicates().shape
+
+# Save this as an excel
+# archetyped_asset_list.to_excel("Stonewater Archetyping Features.xlsx", index=False)
+
+# We store the location data, which will be used for the mapping. We just need the longitude and latitude
+mapping_data = stonewater_asset_list[
+    stonewater_asset_list["archetype_representative"]
+][["internal_id", "uprn", "standardised_address", "standardised_postcode"]]
+
+mapping_data = mapping_data.merge(
+    clustering_features[["internal_id", "LONGITUDE", "LATITUDE"]],
+)
+mapping_data = mapping_data.drop(columns=["internal_id"])
+
+with open("etl/customers/stonewater/map_app/Stonewater Mapping Data.json", "w") as f:
+    f.write(json.dumps(mapping_data.to_dict(orient="records")))
+
+# We also include some data for visualising the breakdown of EPCS
+proportion_of_real_epcs = clustering_features["estimated"].value_counts().to_frame().reset_index()
+# Invert the true and false
+proportion_of_real_epcs["estimated"] = ~proportion_of_real_epcs["estimated"]
+proportion_of_real_epcs = proportion_of_real_epcs.rename(
+    columns={"estimated": "is_real_epc"}
+)
+
+with open("etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json", "w") as f:
+    f.write(json.dumps(proportion_of_real_epcs.to_dict(orient="records")))
+
+# Produce the breakdown of EPC ratings
+epc_rating_breakdown = (
+    clustering_features[~clustering_features["estimated"]]["current-energy-rating"]
+    .value_counts()
+    .to_frame()
+    .reset_index()
+)
+
+epc_rating_breakdown = epc_rating_breakdown.rename(
+    columns={"current-energy-rating": "EPC"}
+)
+
+with open("etl/customers/stonewater/map_app/Stonewater EPC rating breakdown.json", "w") as f:
+    f.write(json.dumps(epc_rating_breakdown.to_dict(orient="records")))
+
+epc_a_properties = clustering_features[
+    (clustering_features["current-energy-rating"] == "A")
+    & (~clustering_features["estimated"])
+    ]
+
+epc_a_properties = epc_a_properties.merge(
+    stonewater_asset_list,
+    on="internal_id",
+    how="inner"
+)
--- a/etl/customers/stonewater/shdf_3_clustering.py
+++ b/etl/customers/stonewater/shdf_3_clustering.py
--- a/etl/customers/unitas/20_may_2024_data_pull.py
+++ b/etl/customers/unitas/20_may_2024_data_pull.py
@ -0,0 +1,148 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by Unitas
+    :return:
+    """
+    # asset_list = read_excel_from_s3(
+    #     bucket_name="retrofit-datalake-dev",
+    #     file_key="customers/guiness/TGP CW Properties PV.xlsx",
+    #     header_row=0
+    # )
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/UNITAS BUNGALOWS - EPC DATA PULL.xlsx", header=0
+    )
+
+    epc_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        searcher = SearchEpc(
+            address1=str(home["Address Line 1"]),
+            postcode=home["Post Code"],
+            uprn=home["Property Reference"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_address": home["Address Line 1"],
+            "asset_list_postcode": home["Post Code"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description"
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["Address Line 1"],
+        right_on=["asset_list_address"]
+    )
+
+    asset_list = asset_list.drop(columns=["asset_list_address"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "EPC Property Type",
+        "built-form": "Archetype",
+        "total-floor-area": "Property Floor Area",
+        "construction-age-band": "Property Age Band",
+        "floor-height": "Property Floor Height",
+        "number-habitable-rooms": "Number of Habitable Rooms",
+        "walls-description": "Wall Construction",
+        "roof-description": "Roof Construction",
+        "mainheat-description": "Heating Type",
+        "secondheat-description": "Secondary Heating",
+        "transaction-type": "Reason for last EPC"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(property_type=x["EPC Property Type"]) if not pd.isnull(
+            x["EPC Property Type"]) else None,
+        axis=1
+    )
+
+    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ) if not pd.isnull(x["uprn"]) else None, axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["Archetype"]
+        ) if not pd.isnull(x["uprn"]) else None,
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
+            x["uprn"]) else None,
+        axis=1
+    )
+
+    # Store as an excel
+    filename = "UNITAS BUNGALOWS - EPC DATA PULL - May 30tg 2024.xlsx"
+    asset_list.to_excel(filename, index=False)
--- a/etl/customers/unitas/Audit_check.py
+++ b/etl/customers/unitas/Audit_check.py
@ -0,0 +1,182 @@
+import pandas as pd
+import os
+
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    # Read in rolling master
+    master = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Downloads/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 16.5.24 K - PASSWORD "
+        "PROTECTED/ECO 4 - PHASE 1-Table 1.csv"
+    )
+
+    master = master[master["INSTALLER"] == "SCIS"]
+
+    master = master[
+        [
+            'UPRN', 'NO.', 'Street / Block Name', 'Town/Area', 'Post Code', 'Surveyor', "SUBMISSION DATE"
+        ]
+    ]
+
+    master = master[~pd.isnull(master["UPRN"])]
+    master = master[master["UPRN"] != "NOT ON ASSET LIST"]
+
+    heights = []
+    eco_assessment_epcs = []
+    for _, row in tqdm(master.iterrows(), total=len(master)):
+        searcher = SearchEpc(
+            address1="",
+            postcode="",
+            uprn=str(int(row["UPRN"])),
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=False,
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        # Look for eco assessment epcs
+        eco_epc = [x for x in [searcher.newest_epc] + searcher.older_epcs if x['transaction-type'] == 'ECO assessment']
+        # Take the newest
+        eco_epc = sorted(eco_epc, key=lambda x: x['inspection-date'], reverse=True)
+        if eco_epc:
+            eco_assessment_epcs.append(eco_epc[0])
+
+        height = {
+            "uprn": row["UPRN"],
+            "floor_height": searcher.newest_epc["floor-height"]
+        }
+        heights.append(height)
+
+    heights_df = pd.DataFrame(heights)
+
+    eco_assessment_epcs_df = pd.DataFrame(eco_assessment_epcs)
+
+    merged_heights_df = master.merge(heights_df, left_on="UPRN", right_on="uprn", how="inner")
+    merged_heights_df = merged_heights_df[merged_heights_df["floor_height"] != ""]
+    merged_eco_assessment_epcs_df = master.merge(eco_assessment_epcs_df[["uprn", "floor-height"]], left_on="UPRN",
+                                                 right_on="uprn", how="inner")
+    merged_eco_assessment_epcs_df["floor-height"] = merged_eco_assessment_epcs_df["floor-height"].astype(float)
+
+    merged_eco_assessment_epcs_df.groupby("Surveyor")["floor-height"].mean()
+
+    # Store
+    merged_heights_df.to_csv("Unitas 2022 heights - based on newest EPC.csv", index=False)
+    merged_eco_assessment_epcs_df.to_csv("Unitas 2022 heights - based on ECO assessment EPC.csv", index=False)
+
+    # Read in a diferent sheet
+    master = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/COMMUNITY HOUSING SURVEYS WITH A POST EPC.xlsx"
+    )
+
+    master["row_number"] = master.index
+
+    heights = []
+    eco_assessment_epcs = []
+    expected_pre = []
+    expected_post = []
+    biggest_floor_height = []
+    for _, row in tqdm(master.iterrows(), total=len(master)):
+
+        full_address = ", ".join([
+            str(row["NO."]), row["Street / Block Name"], row["Town/Area"], row["Post Code"]
+        ])
+        searcher = SearchEpc(
+            address1=str(row["NO."]),
+            postcode=str(row["Post Code"]),
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=False,
+            full_address=full_address
+        )
+
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            continue
+
+        all_epcs = [searcher.newest_epc] + searcher.older_epcs
+        # Search for SAP 54s
+        sap_54s = [x for x in all_epcs if x["current-energy-efficiency"] == "54"]
+        sap_69s = [x for x in all_epcs if x["current-energy-efficiency"] == "69"]
+        heights = [float(x["floor-height"]) for x in all_epcs if x["floor-height"] != ""]
+
+        # Look for eco assessment epcs
+        eco_epc = [x for x in [searcher.newest_epc] + searcher.older_epcs if x['transaction-type'] == 'ECO assessment']
+        # Take the newest
+        eco_epc = sorted(eco_epc, key=lambda x: x['inspection-date'], reverse=True)
+        if eco_epc:
+            eco_assessment_epcs.append(
+                {
+                    "row_number": row["row_number"],
+                    **eco_epc[0]
+                }
+            )
+
+        if heights:
+            floor_height_max = max(heights)
+            biggest_floor_height.append(
+                {
+                    "row_number": row["row_number"],
+                    "floor_height": floor_height_max
+                }
+            )
+
+        if sap_54s:
+            expected_pre.append(
+                {
+                    "row_number": row["row_number"],
+                    **sap_54s[0]
+                }
+            )
+
+        if sap_69s:
+            expected_post.append(
+                {
+                    "row_number": row["row_number"],
+                    **sap_69s[0]
+                }
+            )
+
+    expected_pre_df = pd.DataFrame(expected_pre)
+    expected_post_df = pd.DataFrame(expected_post)
+
+    heights_df = pd.DataFrame(biggest_floor_height)
+    eco_assessment_epcs_df = pd.DataFrame(eco_assessment_epcs)
+
+    merged_heights_df = master.merge(heights_df, on="row_number", how="inner")
+    merged_heights_df = merged_heights_df[merged_heights_df["floor_height"] != ""]
+
+    merged_eco_assessment_epcs_df = master.merge(
+        eco_assessment_epcs_df[["row_number", "floor-height"]], on="row_number", how="inner"
+    )
+    merged_eco_assessment_epcs_df["floor-height"] = merged_eco_assessment_epcs_df["floor-height"].astype(float)
+
+    merged_eco_assessment_epcs_df.groupby("Surveyor")["floor-height"].mean()
+
+    # Check average floor height for social housing properties with ECO assessment EPCs in Birmingham
+    sample = pd.read_csv("local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv")
+    sample = sample[sample["TRANSACTION_TYPE"] == "ECO assessment"]
+    sample = sample[sample["TENURE"].isin(["rental (social)", "Rented (social)"])]
+    sample["FLOOR_HEIGHT"] = sample["FLOOR_HEIGHT"].astype(float)
+    sample["FLOOR_HEIGHT"].mean()
+
+    sample[pd.to_datetime(sample["LODGEMENT_DATE"]) >= "2022-01-01"]["FLOOR_HEIGHT"].mean()
--- a/etl/customers/vander_elliot/non_intrusives.py
+++ b/etl/customers/vander_elliot/non_intrusives.py
@ -0,0 +1,129 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+from datetime import datetime
+from etl.non_intrusive_surveys.upload.UploadNonIntrusives import UploadNonIntrusives
+
+PORTFOLIO_ID = 82
+USER_ID = 8
+
+already_installed = [
+    {
+        'address': 'Flat 3 2 Linacre Lane',
+        'postcode': 'L20 5AH',
+        "already_installed": ["windows_glazing"]
+    }
+]
+
+
+def app():
+    """
+    This script handles the creation of the portfolio for the non-intrusive surveys
+    :return:
+    """
+
+    # In the future, we can just use the ordnance survey api
+    uprn_lookup = [
+        {'House Number': 79,
+         'Address Line 1': 'Clare Road',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 9LZ',
+         'uprn': 41018850},
+        {'House Number': 'Flat 1',
+         'Address Line 1': '2 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41052320},
+        {'House Number': 'Flat 2',
+         'Address Line 1': '2 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41052321},
+        {'House Number': 'Flat 3',
+         'Address Line 1': '2 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41052322},
+        {'House Number': 'Flat 4',
+         'Address Line 1': '2 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41222759},
+        {'House Number': 'Flat 1',
+         'Address Line 1': '4 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41222760},
+        {'House Number': 'Flat 2 (NO ACCESS)',
+         'Address Line 1': '4 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41222761},
+        {'House Number': 'Flat 3',
+         'Address Line 1': '4 Linacre Lane',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L20 5AH',
+         'uprn': 41212534},
+        {'House Number': 'Flat 1 (NO ACCESS)',
+         'Address Line 1': '29 Bedford Road',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L4 5PS',
+         'uprn': 38237316},
+        {'House Number': 'Flat 2 (NO ACCESS)',
+         'Address Line 1': '29 Bedford Road',
+         'Address Line 2': 'Liverpool',
+         'Postcode': 'L4 5PS',
+         'uprn': 38237317}
+    ]
+
+    non_intrusive_s3_filename = (
+        "customers/Vander Elliot/Non-intrusive survey template V2 - Amazon Management Services.xlsx"
+    )
+
+    non_intrusive = UploadNonIntrusives(
+        s3_template_location=non_intrusive_s3_filename,
+        s3_bucket="retrofit-datalake-dev",
+        uprn_lookup=uprn_lookup,
+        survey_date=datetime.strptime('2024-06-21', '%Y-%m-%d')
+    )
+    non_intrusive.upload()
+
+    # We can create the asset list from the uprn lookup
+    asset_list = [
+        {
+            "uprn": x["uprn"],
+            "address": f"{x['House Number']} {x['Address Line 1']}",
+            "postcode": x["Postcode"]
+        }
+        for x in uprn_lookup
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store overrides in s3
+    already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(already_installed),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=already_installed_filename
+    )
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": already_installed_filename,
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@ -5,7 +5,7 @@ from BaseUtility import Definitions
 from etl.epc.settings import (
    DATA_PROCESSOR_SETTINGS,
    EARLIEST_EPC_DATE,
-    IGNORED_TRANSACTION_TYPES,
+    # IGNORED_TRANSACTION_TYPES,
    IGNORED_FLOOR_LEVELS,
    IGNORED_PROPERTY_TYPES,
    IGNORED_TENURES,
@ -56,8 +56,11 @@ construction_age_remap = {

 expanded_map = {
    i: [
-        label for label, bounds in construction_age_bounds_map.items() if (i <= bounds["u"]) and (i >= bounds['l'])
-    ][0] for i in range(0, 3001)
+        label
+        for label, bounds in construction_age_bounds_map.items()
+        if (i <= bounds["u"]) and (i >= bounds["l"])
+    ][0]
+    for i in range(0, 3001)
 }


@ -74,8 +77,13 @@ class EPCDataProcessor:
    Handle data loading and data preprocessing
    """

-    def __init__(self, data: pd.DataFrame | None = None, cleaning_averages: pd.DataFrame | None = None,
-                 run_mode: str = "training", violation_mode: bool = False) -> None:
+    def __init__(
+        self,
+        data: pd.DataFrame | None = None,
+        cleaning_averages: pd.DataFrame | None = None,
+        run_mode: str = "training",
+        violation_mode: bool = False,
+    ) -> None:
        """
        :param filepath: If specified, is the physical location of the data
        :param is_newdata: Indicates if we are processing new, testing data.
@ -86,7 +94,9 @@ class EPCDataProcessor:
        self.data: pd.DataFrame = data if is_data_a_dataframe else pd.DataFrame()

        is_cleaning_averages_a_dataframe = isinstance(cleaning_averages, pd.DataFrame)
-        self.cleaning_averages: pd.DataFrame = cleaning_averages if is_cleaning_averages_a_dataframe else pd.DataFrame()
+        self.cleaning_averages: pd.DataFrame = (
+            cleaning_averages if is_cleaning_averages_a_dataframe else pd.DataFrame()
+        )

        # FOR NOW IF VIOLATION MODE IS ON, WE USE RUN MODE AS NEWDATA
        self.violation_mode = violation_mode
@ -103,7 +113,9 @@ class EPCDataProcessor:
        ignore_step = True if self.run_mode == "newdata" else False

        if filepath is not None:
-            self.load_data(filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
+            self.load_data(
+                filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"]
+            )

        if len(self.data) == 0:
            raise Exception("No data to process - check filepath/ data being passed in")
@ -121,7 +133,8 @@ class EPCDataProcessor:
        self.clean_multi_glaze_proportion(ignore_step=ignore_step)
        self.clean_photo_supply()
        self.retain_multiple_epc_properties(
-            epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"], ignore_step=ignore_step
+            epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"],
+            ignore_step=ignore_step,
        )

        self.fill_na_fields()
@ -188,7 +201,9 @@ class EPCDataProcessor:
        if ignore_step:
            return

-        self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
+        self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[
+            0
+        ]

    def fill_invalid_constituency_fields(self, ignore_step: bool = False):
        """
@ -201,7 +216,9 @@ class EPCDataProcessor:
        if ignore_step:
            return

-        self.data = self.data.fillna({"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]})
+        self.data = self.data.fillna(
+            {"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]}
+        )

    def sort_data_by_uprn_lodgement_date(self, ignore_step: bool = False):
        """
@ -301,7 +318,7 @@ class EPCDataProcessor:
        """

        if self.violation_mode:
-            # TODO: to fill in 
+            # TODO: to fill in
            return

        if ignore_step:
@ -311,9 +328,7 @@ class EPCDataProcessor:
            lambda x: self.clean_construction_age_band(x)
        )

-        self.data = self.data[
-            ~pd.isnull(self.data["CONSTRUCTION_AGE_BAND"])
-        ]
+        self.data = self.data[~pd.isnull(self.data["CONSTRUCTION_AGE_BAND"])]

    def clean_missing_rooms(self, ignore_step: bool = False):
        """
@ -331,31 +346,45 @@ class EPCDataProcessor:
            return

        # TODO: DO we want to move this out of this function? (i.e. alter the data before we do any cleaning)
-        self.data["POSTAL_AREA"] = self.data["POSTCODE"].apply(lambda x: x.split(" ")[0])
+        self.data["POSTAL_AREA"] = self.data["POSTCODE"].apply(
+            lambda x: x.split(" ")[0]
+        )

        def apply_clean(data, matching_columns):

-            cleaning_data = data[~pd.isnull(data[col])].groupby(
-                matching_columns
-            )[col].median().reset_index()
-
-            data = data.merge(
-                cleaning_data, how="left", on=matching_columns, suffixes=("", "_CLEANING")
+            cleaning_data = (
+                data[~pd.isnull(data[col])]
+                .groupby(matching_columns)[col]
+                .median()
+                .reset_index()
            )

-            data[col] = np.where(pd.isnull(data[col]), data[f"{col}_CLEANING"], data[col])
+            data = data.merge(
+                cleaning_data,
+                how="left",
+                on=matching_columns,
+                suffixes=("", "_CLEANING"),
+            )
+
+            data[col] = np.where(
+                pd.isnull(data[col]), data[f"{col}_CLEANING"], data[col]
+            )
            data = data.drop(columns=f"{col}_CLEANING")
            return data

        for col in ["NUMBER_HEATED_ROOMS", "NUMBER_HABITABLE_ROOMS"]:

            to_index = 3
-            matching_columns = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "POSTAL_AREA"]
+            matching_columns = [
+                "PROPERTY_TYPE",
+                "BUILT_FORM",
+                "CONSTRUCTION_AGE_BAND",
+                "POSTAL_AREA",
+            ]
            has_missings = pd.isnull(self.data[col]).sum()
            while has_missings:
                self.data = apply_clean(
-                    data=self.data,
-                    matching_columns=matching_columns[0:to_index + 1]
+                    data=self.data, matching_columns=matching_columns[0 : to_index + 1]
                )
                has_missings = pd.isnull(self.data[col]).sum()

@ -363,7 +392,10 @@ class EPCDataProcessor:
                    # Check if we've gotten to index 0 and still have missings - something has gone wrong or
                    # we have a very unique property type
                    if has_missings:
-                        raise NotImplementedError("Handle this edge case, we still have missings for column %s" % col)
+                        raise NotImplementedError(
+                            "Handle this edge case, we still have missings for column %s"
+                            % col
+                        )

                    break
                to_index -= 1
@ -410,7 +442,7 @@ class EPCDataProcessor:
    #     coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.is_newdata else
    #     COLUMNTYPES
    #     for k, v in coltypes.items():
-    #         self.data[k] = self.data[k].astype(v) 
+    #         self.data[k] = self.data[k].astype(v)
    #     self.data = self.data.astype(coltypes)

    #     self.na_remapping()
@ -437,9 +469,11 @@ class EPCDataProcessor:

    def na_remapping(self, auto_subset_columns: bool = False):

-        fill_na_map_apply = {
-            k: v for k, v in fill_na_map.items() if k in self.data.columns
-        } if auto_subset_columns else fill_na_map
+        fill_na_map_apply = (
+            {k: v for k, v in fill_na_map.items() if k in self.data.columns}
+            if auto_subset_columns
+            else fill_na_map
+        )

        for column, fill_value in fill_na_map_apply.items():
            self.data[column] = self.data[column].fillna(fill_value)
@ -535,28 +569,34 @@ class EPCDataProcessor:

        for variable in AVERAGE_FIXED_FEATURES:
            # Replace any missing NAN values with averages for the same Property type and built form
-            cleaning_averages_filled[variable] = cleaning_averages_filled[variable].fillna(
-                cleaning_averages_filled[f"{variable}_AVERAGE"]
-            )
+            cleaning_averages_filled[variable] = cleaning_averages_filled[
+                variable
+            ].fillna(cleaning_averages_filled[f"{variable}_AVERAGE"])

-            cleaning_averages_filled = cleaning_averages_filled.drop(columns=f"{variable}_AVERAGE")
+            cleaning_averages_filled = cleaning_averages_filled.drop(
+                columns=f"{variable}_AVERAGE"
+            )

            #  If there are still NA values i.e. the averages do not have values for a speicifc group of property tyope
            #  and built form
            #  We can use just the property type average and replace

-            cleaning_averages_filled[variable] = cleaning_averages_filled[variable].fillna(
-                cleaning_averages_filled[f"{variable}_PROPERTY_AVERAGE"]
-            )
+            cleaning_averages_filled[variable] = cleaning_averages_filled[
+                variable
+            ].fillna(cleaning_averages_filled[f"{variable}_PROPERTY_AVERAGE"])

-            cleaning_averages_filled = cleaning_averages_filled.drop(columns=f"{variable}_PROPERTY_AVERAGE")
+            cleaning_averages_filled = cleaning_averages_filled.drop(
+                columns=f"{variable}_PROPERTY_AVERAGE"
+            )

            # If there are still NA values, use BUILT FORM averages
-            cleaning_averages_filled["variable"] = cleaning_averages_filled[variable].fillna(
-                cleaning_averages_filled[f"{variable}_BUILT_FORM_AVERAGE"]
-            )
+            cleaning_averages_filled["variable"] = cleaning_averages_filled[
+                variable
+            ].fillna(cleaning_averages_filled[f"{variable}_BUILT_FORM_AVERAGE"])

-            cleaning_averages_filled = cleaning_averages_filled.drop(columns=f"{variable}_BUILT_FORM_AVERAGE")
+            cleaning_averages_filled = cleaning_averages_filled.drop(
+                columns=f"{variable}_BUILT_FORM_AVERAGE"
+            )

            # If there still is na values, use average across all epc in consituecy
            cleaning_averages_filled[variable] = cleaning_averages_filled[
@ -573,7 +613,9 @@ class EPCDataProcessor:

        self.cleaning_averages = cleaning_averages_filled

-    def retain_multiple_epc_properties(self, epc_minimum_count: int = 1, ignore_step: bool = False) -> None:
+    def retain_multiple_epc_properties(
+        self, epc_minimum_count: int = 1, ignore_step: bool = False
+    ) -> None:
        """
        Reduce the data futher by keeping only datasets with multiple epcs
        """
@ -592,12 +634,16 @@ class EPCDataProcessor:
        counts = counts[counts["count"] > epc_minimum_count]
        self.data = pd.merge(self.data, counts, on="UPRN")

-    def recast_df_columns(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
+    def recast_df_columns(
+        self, column_mappings: dict, auto_subset_columns: bool = False
+    ) -> None:
        """
        Recast columns from the dataframe to ensure the behaviour we want
        """
        if auto_subset_columns:
-            column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
+            column_mappings = {
+                k: v for k, v in column_mappings.items() if k in self.data.columns
+            }

        for key, values in column_mappings.items():
            if key not in self.data.columns:
@ -608,13 +654,17 @@ class EPCDataProcessor:
            else:
                self.data[key] = self.data[key].astype(values)

-    def recast_all_data(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
+    def recast_all_data(
+        self, column_mappings: dict, auto_subset_columns: bool = False
+    ) -> None:
        """
        Using a dictionary to recast all columns at once
        """

        if auto_subset_columns:
-            column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
+            column_mappings = {
+                k: v for k, v in column_mappings.items() if k in self.data.columns
+            }

        self.data = self.data.astype(column_mappings)

@ -625,14 +675,26 @@ class EPCDataProcessor:

        if self.violation_mode:
            violation_uprn_missing = pd.isnull(self.data["UPRN"])
-            violation_old_lodgment_date = self.data["LODGEMENT_DATE"] < EARLIEST_EPC_DATE
-            violation_invalid_transaction_type = self.data["TRANSACTION_TYPE"] == IGNORED_TRANSACTION_TYPES
-            violation_ignored_floor_level = self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
+            violation_old_lodgment_date = (
+                self.data["LODGEMENT_DATE"] < EARLIEST_EPC_DATE
+            )
+            # violation_invalid_transaction_type = self.data["TRANSACTION_TYPE"] == IGNORED_TRANSACTION_TYPES
+            violation_ignored_floor_level = self.data["FLOOR_LEVEL"].isin(
+                IGNORED_FLOOR_LEVELS
+            )
            violation_rdsap_score_above_max = self.data[RDSAP_RESPONSE] > MAX_SAP_SCORE
-            violation_missing_windows_description = pd.isnull(self.data["WINDOWS_DESCRIPTION"])
-            violation_missing_hotwater_description = pd.isnull(self.data["HOTWATER_DESCRIPTION"])
-            violation_missing_roof_description = pd.isnull(self.data["ROOF_DESCRIPTION"])
-            violation_invalid_property_type = self.data["PROPERTY_TYPE"] == IGNORED_PROPERTY_TYPES
+            violation_missing_windows_description = pd.isnull(
+                self.data["WINDOWS_DESCRIPTION"]
+            )
+            violation_missing_hotwater_description = pd.isnull(
+                self.data["HOTWATER_DESCRIPTION"]
+            )
+            violation_missing_roof_description = pd.isnull(
+                self.data["ROOF_DESCRIPTION"]
+            )
+            violation_invalid_property_type = (
+                self.data["PROPERTY_TYPE"] == IGNORED_PROPERTY_TYPES
+            )
            violation_invalid_tenure = self.data["TENURE"].isin(IGNORED_TENURES)

            violation_df = pd.concat(
@ -647,7 +709,8 @@ class EPCDataProcessor:
                    violation_missing_roof_description,
                    violation_invalid_property_type,
                    violation_invalid_tenure,
-                ], axis=1,
+                ],
+                axis=1,
                keys=[
                    "violation_uprn_missing",
                    "violation_old_lodgment_date",
@ -658,8 +721,8 @@ class EPCDataProcessor:
                    "violation_missing_hotwater_description",
                    "violation_missing_roof_description",
                    "violation_invalid_property_type",
-                    "violation_invalid_tenure"
-                ]
+                    "violation_invalid_tenure",
+                ],
            )

            self.data = pd.concat([self.data, violation_df], axis=1)
@ -685,10 +748,8 @@ class EPCDataProcessor:

        self.data = self.data[~pd.isnull(self.data["UPRN"])]
        self.data = self.data[self.data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
-        self.data = self.data[self.data["TRANSACTION_TYPE"] != IGNORED_TRANSACTION_TYPES]
-        self.data = self.data[
-            ~self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
-        ]
+        # self.data = self.data[self.data["TRANSACTION_TYPE"] != IGNORED_TRANSACTION_TYPES]
+        self.data = self.data[~self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)]
        self.data = self.data[self.data[RDSAP_RESPONSE] <= MAX_SAP_SCORE]

        # We observed 7 final records with missing windows and 2 records with missing hot water so we shall remove them
@ -705,7 +766,7 @@ class EPCDataProcessor:
        self.data = self.data[~self.data["TENURE"].isin(IGNORED_TENURES)]

        # We remap zero values to None
-        self.data.loc[self.data['FLOOR_HEIGHT'] == 0, 'FLOOR_HEIGHT'] = None
+        self.data.loc[self.data["FLOOR_HEIGHT"] == 0, "FLOOR_HEIGHT"] = None

    def clean_multi_glaze_proportion(self, ignore_step: bool = False) -> None:
        """
@ -734,7 +795,11 @@ class EPCDataProcessor:

    @staticmethod
    def apply_averages_cleaning(
-        data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False
+        data_to_clean,
+        cleaning_data,
+        cols_to_merge_on,
+        colnames=None,
+        ignore_step: bool = False,
    ):
        """
        Clean the input DataFrame using averages from a cleaning DataFrame.
@ -752,12 +817,13 @@ class EPCDataProcessor:

        # The desired colnames to clean - which may not be present
        if colnames is None:
-            colnames = ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"]
+            colnames = [
+                "TOTAL_FLOOR_AREA",
+                "FLOOR_HEIGHT",
+                "FIXED_LIGHTING_OUTLETS_COUNT",
+            ]

-        cols_to_clean = [
-            c for c in colnames if
-            c in data_to_clean.columns
-        ]
+        cols_to_clean = [c for c in colnames if c in data_to_clean.columns]

        # Enforce data types
        for col in ["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"]:
@ -768,7 +834,15 @@ class EPCDataProcessor:

        # Calculate averages
        cleaning_averages_to_merge = cleaning_data.groupby(columns_to_merge_on).agg(
-            dict(zip(cols_to_clean, ["mean", ] * len(cols_to_clean)))
+            dict(
+                zip(
+                    cols_to_clean,
+                    [
+                        "mean",
+                    ]
+                    * len(cols_to_clean),
+                )
+            )
        )

        # Merge with the original data
@ -777,7 +851,7 @@ class EPCDataProcessor:
            cleaning_averages_to_merge,
            on=columns_to_merge_on,
            suffixes=("", "_AVERAGE"),
-            how='left'
+            how="left",
        )

        global_averages = cleaning_data[cols_to_clean].mean()
@ -806,14 +880,20 @@ class EPCDataProcessor:
            raise Exception("Suffix should be one of _starting or _ending")

        if suffix == "_STARTING":
-            starting_cols = self.data[STARTING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES].copy().add_suffix(suffix)
+            starting_cols = (
+                self.data[STARTING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES]
+                .copy()
+                .add_suffix(suffix)
+            )
            fixed_cols = self.data[NO_SUFFIX_COMPONENT_COLS + POTENTIAL_COLUMNS].copy()

            return pd.concat([starting_cols, fixed_cols], axis=1)

-        return self.data[
-            ENDING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES
-            ].copy().add_suffix(suffix)
+        return (
+            self.data[ENDING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES]
+            .copy()
+            .add_suffix(suffix)
+        )

    def get_fixed_features(self) -> pd.DataFrame:
        """
@ -831,14 +911,17 @@ class EPCDataProcessor:
        :param cols_to_ignore: If specified, is a list of columns to ignore, e.g. uuids
        :return: DataFrame with coerced columns.
        """
-        object_columns = df.select_dtypes(include=['object']).columns
+        object_columns = df.select_dtypes(include=["object"]).columns
        if cols_to_ignore:
            object_columns = [c for c in object_columns if c not in cols_to_ignore]

        for column in object_columns:
            unique_values = df[column].dropna().unique()
            # If the unique values in the column are 'True' and 'False', convert the column to boolean
-            if set(unique_values) == {'True', 'False'} or set(unique_values) == {True, False}:
+            if set(unique_values) == {"True", "False"} or set(unique_values) == {
+                True,
+                False,
+            }:
                df[column] = df[column].astype(bool)

        return df
@ -877,7 +960,6 @@ class EPCDataProcessor:

    @staticmethod
    def clean_efficiency_variables(df):
-
        """
        These is scope to clean this by the model per corresponding description.
        E.g. for WALLS_ENG_EFF we could look at the mode efficiency rating by description and
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
        common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]

        self.df = self.df.loc[
-            :,
-            no_suffix_cols
-            + only_ending_cols
-            + [col for cols in common_cols for col in cols],
-        ]
+                  :,
+                  no_suffix_cols
+                  + only_ending_cols
+                  + [col for cols in common_cols for col in cols],
+                  ]

    def _remove_abnormal_change_in_floor_area(self):
        """
@ -229,7 +229,9 @@ class TrainingDataset(BaseDataset):
        """
        # TODO: move into EPCRecord record
        uvalue_columns = [
-            col for col in self.df.columns if "thermal_transmittance" in col
+            col
+            for col in self.df.columns
+            if "thermal_transmittance" in col and "_unit" not in col
        ]
        for uvalue_col in uvalue_columns:
            self.df[uvalue_col] = pd.to_numeric(self.df[uvalue_col])
@ -509,7 +511,7 @@ class TrainingDataset(BaseDataset):
                    expanded_df["is_sandstone_or_limestone"]
                    == expanded_df["is_sandstone_or_limestone_ending"]
                )
-            ]
+                ]
        elif component == "floor":
            expanded_df = expanded_df[
                (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@ -526,7 +528,7 @@ class TrainingDataset(BaseDataset):
                    expanded_df["is_to_external_air"]
                    == expanded_df["is_to_external_air_ending"]
                )
-            ]
+                ]
        elif component == "roof":
            expanded_df = expanded_df[
                (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@ -539,7 +541,7 @@ class TrainingDataset(BaseDataset):
                    expanded_df["has_dwelling_above"]
                    == expanded_df["has_dwelling_above_ending"]
                )
-            ]
+                ]

        return expanded_df

@ -559,9 +561,9 @@ class TrainingDataset(BaseDataset):
            "walls": [
                # We need to cleaned descriptions for pulling out u-values
                "original_description",
-                "thermal_transmittance_unit",
+                # "thermal_transmittance_unit",
                "original_description_ending",
-                "thermal_transmittance_unit_ending",
+                # "thermal_transmittance_unit_ending",
                "is_cavity_wall_ending",
                "is_solid_brick_ending",
                "is_system_built_ending",
@ -703,6 +705,8 @@ class TrainingDataset(BaseDataset):
                    "insulation_thickness_ending": f"{component}_insulation_thickness_ending",
                    "thermal_transmittance": f"{component}_thermal_transmittance",
                    "thermal_transmittance_ending": f"{component}_thermal_transmittance_ending",
+                    "thermal_transmittance_unit": f"{component}_thermal_transmittance_unit",
+                    "thermal_transmittance_unit_ending": f"{component}_thermal_transmittance_unit_ending",
                    "tariff_type": f"{component}_tariff_type",
                    "tariff_type_ending": f"{component}_tariff_type_ending",
                    "clean_description": f"{component}_clean_description",
@ -738,7 +742,7 @@ class TrainingDataset(BaseDataset):
                self.df[col] = self.df[col].fillna("Unknown")

    def _null_validation(self, information: str):
-        print(f"Null validation after {information}")
+        # print(f"Null validation after {information}")
        if pd.isnull(self.df).sum().sum():
            raise ValueError(f"Null values found in dataset, after step {information}")

--- a/etl/epc/Pipeline.py
+++ b/etl/epc/Pipeline.py
@ -22,6 +22,7 @@ from etl.epc.settings import (
    EFFICIENCY_FEATURES,
    POTENTIAL_COLUMNS,
    ROOM_FEATURES,
+    COST_FEATURES,
 )

 # TODO: change in setting file
@ -39,9 +40,10 @@ VARIABLE_DATA_FEATURES = (
    COMPONENT_FEATURES
    + ROOM_FEATURES
    + EFFICIENCY_FEATURES
-    + POTENTIAL_COLUMNS
+    # + POTENTIAL_COLUMNS
    + ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
 )
+COST_FEATURES = [x.lower() for x in COST_FEATURES]


 def get_cleaned_description_mapping():
@ -64,6 +66,17 @@ def get_cleaned_description_mapping():

 clean_lookup = get_cleaned_description_mapping()

+# TODO: THIS IS A TEMPORARY FIX
+new_walls_description_mapping = pd.DataFrame(clean_lookup["walls-description"])
+new_walls_description_mapping.loc[
+    ~new_walls_description_mapping["thermal_transmittance_unit"].isnull(),
+    "thermal_transmittance_unit",
+] = "w/m-¦k"
+
+clean_lookup["walls-description"] = new_walls_description_mapping.to_dict(
+    orient="records"
+)
+

 class EPCPipeline:
    """
@ -267,7 +280,7 @@ class EPCPipeline:

        # We include the lodgement date here as we probably need to factor time into the
        # model, since EPC standards and rigour have changed over time
-        variable_data = property_data[VARIABLE_DATA_FEATURES]
+        variable_data = property_data[VARIABLE_DATA_FEATURES + COST_FEATURES]

        uprn = str(uprn)
        epc_records = [
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@ -76,10 +76,13 @@ class EPCRecord:
    mainheat_energy_eff: str = None
    mainheatc_energy_eff: str = None
    lighting_energy_eff: str = None
-    potential_energy_efficiency: float = None
-    environment_impact_potential: float = None
-    energy_consumption_potential: float = None
-    co2_emissions_potential: float = None
+    lighting_cost_current: float = None
+    heating_cost_current: float = None
+    hot_water_cost_current: float = None
+    # potential_energy_efficiency: float = None
+    # environment_impact_potential: float = None
+    # energy_consumption_potential: float = None
+    # co2_emissions_potential: float = None
    lodgement_date: str = None
    current_energy_efficiency: int = None
    energy_consumption_current: int = None
@ -249,18 +252,21 @@ class EPCRecord:
        self.mainheat_energy_eff: str = self.prepared_epc["mainheat_energy_eff"]
        self.mainheatc_energy_eff: str = self.prepared_epc["mainheatc_energy_eff"]
        self.lighting_energy_eff: str = self.prepared_epc["lighting_energy_eff"]
-        self.potential_energy_efficiency: float = float(
-            self.prepared_epc["potential_energy_efficiency"]
-        )
-        self.environment_impact_potential: float = float(
-            self.prepared_epc["environment_impact_potential"]
-        )
-        self.energy_consumption_potential: float = float(
-            self.prepared_epc["energy_consumption_potential"]
-        )
-        self.co2_emissions_potential: float = float(
-            self.prepared_epc["co2_emissions_potential"]
-        )
+        self.lighting_cost_current: float = self.prepared_epc["lighting_cost_current"]
+        self.heating_cost_current: float = self.prepared_epc["heating_cost_current"]
+        self.hot_water_cost_current: float = self.prepared_epc["hot_water_cost_current"]
+        # self.potential_energy_efficiency: float = float(
+        #     self.prepared_epc["potential_energy_efficiency"]
+        # )
+        # self.environment_impact_potential: float = float(
+        #     self.prepared_epc["environment_impact_potential"]
+        # )
+        # self.energy_consumption_potential: float = float(
+        #     self.prepared_epc["energy_consumption_potential"]
+        # )
+        # self.co2_emissions_potential: float = float(
+        #     self.prepared_epc["co2_emissions_potential"]
+        # )
        self.lodgement_date: str = self.prepared_epc["lodgement_date"]
        self.current_energy_efficiency: int = int(
            self.prepared_epc["current_energy_efficiency"]
@ -466,9 +472,7 @@ class EPCRecord:
            (property_dimensions["PROPERTY_TYPE"] == self.prepared_epc["property-type"])
        ]

-        if (
-            self.construction_age_band not in DATA_ANOMALY_MATCHES
-        ):
+        if self.construction_age_band not in DATA_ANOMALY_MATCHES:
            result = result[
                (result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)
            ]
@ -480,7 +484,12 @@ class EPCRecord:
            result = result[(result["BUILT_FORM"] == self.prepared_epc["built-form"])]

        return result[
-            ["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]
+            [
+                "NUMBER_HABITABLE_ROOMS",
+                "NUMBER_HEATED_ROOMS",
+                "TOTAL_FLOOR_AREA",
+                "FLOOR_HEIGHT",
+            ]
        ].mean()

    def _clean_property_dimensions(self):
@ -491,9 +500,11 @@ class EPCRecord:
        if not self.prepared_epc:
            raise ValueError("EPC Record doesn not contain epc data")

-        if (self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES) or (
-            self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES
-        ) or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES):
+        if (
+            (self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES)
+            or (self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES)
+            or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES)
+        ):
            property_dimensions = read_dataframe_from_s3_parquet(
                bucket_name=DATA_BUCKET,
                file_key=f"property_dimensions/{self.prepared_epc['local-authority']}.parquet",
@ -507,12 +518,18 @@ class EPCRecord:
                self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round()
            )
        else:
-            self.prepared_epc["number-habitable-rooms"] = float(self.prepared_epc["number-habitable-rooms"])
+            self.prepared_epc["number-habitable-rooms"] = float(
+                self.prepared_epc["number-habitable-rooms"]
+            )

        if self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES:
-            self.prepared_epc["number-heated-rooms"] = float(self.property_dimensions["NUMBER_HEATED_ROOMS"].round())
+            self.prepared_epc["number-heated-rooms"] = float(
+                self.property_dimensions["NUMBER_HEATED_ROOMS"].round()
+            )
        else:
-            self.prepared_epc["number-heated-rooms"] = float(self.prepared_epc["number-heated-rooms"])
+            self.prepared_epc["number-heated-rooms"] = float(
+                self.prepared_epc["number-heated-rooms"]
+            )

        self.number_of_floors = estimate_number_of_floors(
            self.prepared_epc["property-type"]
@ -1033,18 +1050,24 @@ class EPCDifferenceRecord:
            "heat_demand_ending": self.record2.get(HEAT_DEMAND_RESPONSE),
            "carbon_starting": self.record1.get(CARBON_RESPONSE),
            "carbon_ending": self.record2.get(CARBON_RESPONSE),
-            "potential_energy_efficiency": self.earliest_record.get(
-                "potential_energy_efficiency"
-            ),
-            "environment_impact_potential": self.earliest_record.get(
-                "environment_impact_potential"
-            ),
-            "energy_consumption_potential": self.earliest_record.get(
-                "energy_consumption_potential"
-            ),
-            "co2_emissions_potential": self.earliest_record.get(
-                "co2_emissions_potential"
-            ),
+            "lighting_cost_starting": self.record1.get("lighting_cost_current"),
+            "lighting_cost_ending": self.record2.get("lighting_cost_current"),
+            "heating_cost_starting": self.record1.get("heating_cost_current"),
+            "heating_cost_ending": self.record2.get("heating_cost_current"),
+            "hot_water_cost_starting": self.record1.get("hot_water_cost_current"),
+            "hot_water_cost_ending": self.record2.get("hot_water_cost_current"),
+            # "potential_energy_efficiency": self.earliest_record.get(
+            #     "potential_energy_efficiency"
+            # ),
+            # "environment_impact_potential": self.earliest_record.get(
+            #     "environment_impact_potential"
+            # ),
+            # "energy_consumption_potential": self.earliest_record.get(
+            #     "energy_consumption_potential"
+            # ),
+            # "co2_emissions_potential": self.earliest_record.get(
+            #     "co2_emissions_potential"
+            # ),
            **ending_record,
            **starting_record,
        }
--- a/etl/epc/generate_scenarios_data.py
+++ b/etl/epc/generate_scenarios_data.py
@ -41,6 +41,15 @@ cleaning_data = read_dataframe_from_s3_parquet(
 materials = get_materials(session)
 cleaned = get_cleaned()

+# TODO: THIS IS A TEMPORARY FIX
+new_walls_description_mapping = pd.DataFrame(cleaned["walls-description"])
+new_walls_description_mapping.loc[
+    ~new_walls_description_mapping["thermal_transmittance_unit"].isnull(),
+    "thermal_transmittance_unit",
+] = "w/m-¦k"
+
+cleaned["walls-description"] = new_walls_description_mapping.to_dict(orient="records")
+
 uprn_filenames = read_dataframe_from_s3_parquet(
    bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
 )
@ -167,7 +176,7 @@ for scenario_property in scenario_properties:
    p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)

    recommender = Recommendations(property_instance=p, materials=materials)
-    property_recommendations = recommender.recommend("0")
+    property_recommendations = recommender.recommend()

    wall_recommendations = recommender.wall_recomender.recommendations
    loft_recommendations = recommender.roof_recommender.recommendations
--- a/etl/epc/settings.py
+++ b/etl/epc/settings.py
@ -110,6 +110,12 @@ DEPLOYMENT_FOLDER = "deployment"
 TOTAL_FLOOR_AREA_NATIONAL_AVERAGE = 70
 FLOOR_HEIGHT_NATIONAL_AVERAGE = 2.45

+COST_FEATURES = [
+    "LIGHTING_COST_CURRENT",
+    "HEATING_COST_CURRENT",
+    "HOT_WATER_COST_CURRENT",
+]
+
 AVERAGE_FIXED_FEATURES = [
    "TOTAL_FLOOR_AREA",
    "FLOOR_HEIGHT",
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@ -39,11 +39,8 @@ def app():
    cleaned_data = {}
    epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]

-    WALLS = []
    for directory in tqdm(epc_directories):
        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
-        z = data["WALLS_DESCRIPTION"].unique().tolist()
-        WALLS.extend(z)
        # Rename the columns to the same format as the api returns
        data.columns = [c.replace("_", "-").lower() for c in data.columns]
        # Take just date before the date threshold
--- a/etl/epc_clean/epc_attributes/FloorAttributes.py
+++ b/etl/epc_clean/epc_attributes/FloorAttributes.py
@ -38,7 +38,7 @@ class FloorAttributes(Definitions):
        self.description: str = description.lower()

        self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or (
-            description in self.OBSERVED_ERRORS)
+            description in self.OBSERVED_ERRORS) or (self.description == "sap05:floor")

        # Try and perform a translation, incase it's in welsh
        self.translate_welsh_text()
--- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py
+++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
@ -129,7 +129,9 @@ class HotWaterAttributes(Definitions):
    def __init__(self, description: str):
        self.description: str = clean_description(description.lower()).strip()

-        self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or (
+            self.description == "sap05 hot-water"
+        )

        translation = self.WELSH_TEXT.get(self.description)

--- a/etl/epc_clean/epc_attributes/LightingAttributes.py
+++ b/etl/epc_clean/epc_attributes/LightingAttributes.py
@ -1,15 +1,18 @@
 import re
+from BaseUtility import Definitions
 from etl.epc_clean.epc_attributes.attribute_utils import clean_description
 from etl.epc_clean.utils import correct_spelling


-class LightingAttributes:
+class LightingAttributes(Definitions):
    WELSH_TEXT = {
        "goleuadau ynni-isel ym mhob un ogçör mannau gosod": "low energy lighting in all fixed outlets",
        "dim goleuadau ynni-isel": "no low energy lighting",
        "goleuadau ynni-isel ym mhob un o'r mannau gosod": 'Low energy lighting in all fixed outlets'
    }

+    OBSERVED_ERRORS = []
+
    def __init__(self, description, averages):
        self.description: str = clean_description(description.lower())

@ -18,6 +21,9 @@ class LightingAttributes:
        self.description = correct_spelling(self.description)
        self.averages = averages

+        self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or (
+            description in self.OBSERVED_ERRORS) or (description == "SAP05:Lighting")
+
    def welsh_translation_search(self):
        """
        For welsh text describing the percentage of low energy lighting, we match the regular
@ -40,6 +46,9 @@ class LightingAttributes:

        description = self.description

+        if self.nodata:
+            return {"low_energy_proportion": None}
+
        if 'no low energy lighting' in description:
            return {"low_energy_proportion": 0}

--- a/etl/epc_clean/epc_attributes/MainheatAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py
@ -77,7 +77,9 @@ class MainHeatAttributes(Definitions):

        self.description: str = clean_description(self.description).strip()
        # Remove special characters
-        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or (
+            description == "SAP05:Main-Heating"
+        )

        translation = self.WELSH_TEXT.get(self.description)
        if translation:
@ -97,11 +99,12 @@ class MainHeatAttributes(Definitions):

        self.process_edge_cases()

-        if (not description or not any(
-            rt in self.description for rt in
-            self.HEAT_SYSTEMS + self.FUEL_TYPES + self.DISTRIBUTION_SYSTEMS + self.OTHERS
-        ) and not self.is_edge_case):
-            raise ValueError('Invalid description')
+        if not self.nodata:
+            if (not description or not any(
+                rt in self.description for rt in
+                self.HEAT_SYSTEMS + self.FUEL_TYPES + self.DISTRIBUTION_SYSTEMS + self.OTHERS
+            ) and not self.is_edge_case):
+                raise ValueError('Invalid description')

    def process_edge_cases(self) -> (dict, bool):
        """
--- a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py
@ -117,7 +117,9 @@ class MainheatControlAttributes(Definitions):

    def __init__(self, description: str):
        self.description: str = clean_description(description.lower()).strip()
-        self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or (
+            description == "SAP05:Main-Heating-Controls"
+        )

        translation = self.WELSH_TEXT.get(self.description)
        if translation:
--- a/etl/epc_clean/epc_attributes/RoofAttributes.py
+++ b/etl/epc_clean/epc_attributes/RoofAttributes.py
@ -45,7 +45,7 @@ class RoofAttributes(Definitions):
        """

        self.description: str = description.lower().strip()
-        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or self.description == "sap05:roof"

        self.welsh_translation_search()

--- a/etl/epc_clean/epc_attributes/WallAttributes.py
+++ b/etl/epc_clean/epc_attributes/WallAttributes.py
@ -75,12 +75,19 @@ class WallAttributes(Definitions):
        'insulation_thickness', 'external_insulation', 'internal_insulation'
    ]

+    CORRECTIONS = {
+        "Granite or whin, as built, no insulation (assumed)": "Granite or whinstone, as built, no insulation (assumed)",
+    }
+
    def __init__(self, description: str):
        """
        :param description: Description of the walls.
        """
        self.description: str = description

+        if self.description in self.CORRECTIONS:
+            self.description = self.CORRECTIONS[self.description]
+
        self.welsh_translation_search()

        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
--- a/etl/epc_clean/epc_attributes/WindowAttributes.py
+++ b/etl/epc_clean/epc_attributes/WindowAttributes.py
@ -38,7 +38,7 @@ class WindowAttributes(Definitions):

        # In the case of an empty description, we want to return a dictionary with all values set to False
        # and indicate there was no data
-        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
+        self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or description == "SAP05:Windows"

        translation = self.WELSH_TEXT.get(self.description)
        if translation:
--- a/etl/epc_clean/epc_attributes/attribute_utils.py
+++ b/etl/epc_clean/epc_attributes/attribute_utils.py
@ -2,8 +2,8 @@ import re
 import string
 from typing import Tuple, Union, Dict, List

-THERMAL_TRANSMITTANCE_STR = r"average thermal transmittance (-?\d+(\.\d+)?)\s(w/m\S+k)"
-THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTANCE_STR)
+THERMAL_TRANSMITTANCE_STR = r"average thermal transmittance\s*[=:-]?\s*(-?\d+(\.\d+)?)\s*[wW]/m\S*[kK]"
+THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTANCE_STR, re.IGNORECASE)

 DOUBLE_SPACE_PATTERN = re.compile(r"\s+")

--- a/etl/non_intrusive_surveys/photos/README.md
+++ b/etl/non_intrusive_surveys/photos/README.md
@ -15,5 +15,5 @@ pip install -r requirements.txt
 The main application is found in the app.py file. To run the application, use the following command:

 ```bash
-python app.py
+python UploadNonIntrusives.py
 ```
--- a/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
+++ b/etl/non_intrusive_surveys/upload/UploadNonIntrusives.py
@ -0,0 +1,149 @@
+from utils.s3 import read_excel_from_s3
+from utils.logger import setup_logger
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes
+
+logger = setup_logger()
+
+
+class UploadNonIntrusives:
+    """
+    This class handles the upload of findings from the non-intrusive surveys, to the database
+    """
+
+    COLUMN_PREFIXES: dict = {
+        'Surveyor First Name': 'Surveyor',
+        'Surveyor Last Name': 'Surveyor',
+        'House Number': 'Property Details',
+        'Address Line 1': 'Property Details',
+        'Address Line 2': 'Property Details',
+        'Postcode': 'Property Details',
+        'Property Year Built': 'Property Details',
+        'Wall Construction': 'Walls',
+        'Wall Construction Notes': 'Walls',
+        'Existing insulation?': 'Walls',
+        'Retro Drilled?': 'Walls',
+        'Condition (cracks & damp)': 'Walls',
+        'Condition Notes': 'Walls',
+        'Alternative walls': 'Walls',
+        'Alternative walls percentage': 'Walls',
+        'Adequate Ventilation?': 'Walls',
+        'Ventilation notes': 'Walls',
+        'Party wall': 'Walls',
+        'Floor Type': 'Floor',
+        'Wall render': 'Wall Render',
+        'Wall Render Condition': 'Wall Render',
+        'Roof Type': 'Roof',
+        'Roof insulation ': 'Roof',
+        'Roof Condition': 'Roof',
+        'Obvious Roof Shading': 'Roof',
+        'Roof orientation - Primary': 'Roof',
+        'Roof orientation - Secondary': 'Roof',
+        'Obstructions on the roof': 'Roof',
+        'Flue type': 'Heating',
+        'Is there an extension?': 'Access',
+        'Are there any out-buildings?': 'Access',
+        'Is there a conservatory?': 'Access',
+        'Is the property straight onto a footpath?': 'Access',
+        'Is there a requirement for planning consent for works?': 'Access',
+        'Is there space for an external unit?': 'Air Source Heat Pump',
+        'Could a cylinder fit in the loft?': 'Air Source Heat Pump',
+        'Are there obvious areas of heat loss from the walls?': 'Thermography',
+        'Are there obvious areas of heat loss from the roof?': 'Thermography',
+        'Does the existing insulation exhibit signs of inconsistent performance or underperformance?': 'Thermography',
+        'Is there excessive levels of heat loss from windows?': 'Thermography',
+        'Is there excessive levels of heat loss from doors?': 'Thermography',
+        'Material inside the walls': 'Borescope Test',
+        'Cavity depth (mm)': 'Borescope Test',
+        'Is there rubble in the cavity?': 'Borescope Test',
+        'Wall tie type': 'Borescope Test',
+        'Wall tie integrity': 'Borescope Test',
+        'Inner block work': 'Borescope Test',
+        'Current glazing': 'Windows',
+        'Windows Age (pre/post 2002)': 'Windows',
+        'Glazing gap': 'Windows',
+        'Are there obvious trickle vents in the windows?': 'Windows',
+        'Is there sufficient space in the garden?': 'Ground Source Heat Pump',
+        'Does the property need a CIGA check?': 'Funding',
+        'Is the property eligible for GBIS?': 'Funding',
+        'Is the property eligible for ECO4?': 'Funding',
+        'Is the property eligible for the Local Authority Flex Scheme?': 'Funding',
+        'Is the property eligible for HUG?': 'Funding',
+        'Is the property eligible for LAD?': 'Funding',
+        'Other funding recommendations': 'Funding'
+    }
+
+    def __init__(self, s3_template_location, s3_bucket, uprn_lookup, survey_date):
+        self.s3_template_location = s3_template_location
+        self.s3_bucket = s3_bucket
+        self.template = self.read_template()
+
+        self.uprn_lookup = uprn_lookup
+        self.survey_date = survey_date
+
+    def read_template(self):
+        """
+        This method reads the template from S3
+        """
+        return read_excel_from_s3(file_key=self.s3_template_location, bucket_name=self.s3_bucket, header_row=2)
+
+    def upload(self):
+        """
+        This method uploads the non-intrusive survey data to the database
+        """
+
+        if self.uprn_lookup is None:
+            raise Exception("Implement call to ordnance survey to get uprn lookup data")
+
+        logger.info("Preparing non-intrusive notes")
+        non_intrusives = self.template.to_dict(orient="records")
+
+        non_invasive_notes = []
+        for survey in non_intrusives:
+            # Remove any NAN entries
+            survey_clean = {self.COLUMN_PREFIXES[k] + ": " + k: v for k, v in survey.items() if v == v}
+
+            uprn_data = [
+                x for x in self.uprn_lookup if (
+                    str(x['House Number']).strip() == str(survey_clean['Property Details: House Number']).strip() and
+                    x['Address Line 1'] == survey_clean['Property Details: Address Line 1'].strip() and
+                    x['Address Line 2'] == survey_clean['Property Details: Address Line 2'].strip() and
+                    x['Postcode'] == survey_clean['Property Details: Postcode'].strip()
+                )
+            ]
+            if len(uprn_data) != 1:
+                address = (
+                    str(survey_clean['Property Details: House Number']) + ' ' +
+                    survey_clean['Property Details: Address Line 1'] + ' ' +
+                    survey_clean['Property Details: Address Line 2'] + ' ' +
+                    survey_clean['Property Details: Postcode']
+                )
+                raise Exception(f"Failed to find UPRN data for {address}")
+
+            surveyor = (
+                survey_clean.pop("Surveyor: Surveyor First Name") + " " +
+                survey_clean.pop("Surveyor: Surveyor Last Name")
+            )
+
+            # Include all of the information apart from data that includes the Property details prefix and the
+            # surveyor - we do however include Property Details: Property Year Built
+            notes_to_upload = {
+                k: v for k, v in survey_clean.items() if k == "Property Details: Property Year Built" or (
+                    not k.startswith("Property Details") and
+                    not k.startswith("Surveyor")
+                )
+            }
+
+            non_invasive_notes.append({
+                "uprn": uprn_data[0]['uprn'],
+                "surveyor": surveyor,
+                "survey_date": self.survey_date,
+                **notes_to_upload
+            })
+
+        # Implement call to upload notes_to_upload to the database
+        logger.info("Uploading non-intrusive notes to the database")
+
+        session = sessionmaker(bind=db_engine)()
+        upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)
--- a/etl/non_intrusive_surveys/upload/init.py
+++ b/etl/non_intrusive_surveys/upload/init.py
--- a/etl/property_valuation/requirements.txt
+++ b/etl/property_valuation/requirements.txt
@ -0,0 +1,7 @@
+seleniumbase
+beautifulsoup4
+requests
+pandas
+tqdm
+openpyxl
+undetected_chromedriver
--- a/etl/property_valuation/scrape_valuations.py
+++ b/etl/property_valuation/scrape_valuations.py
@ -0,0 +1,88 @@
+import requests
+import random
+import time
+import pandas as pd
+from bs4 import BeautifulSoup
+from tqdm import tqdm
+from seleniumbase import Driver
+from seleniumbase import page_actions
+
+import undetected_chromedriver as webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+import time
+import pandas as pd
+
+BASE_URL = "https://www.zoopla.co.uk/property/uprn/{uprn}/"
+
+
+def initialize_driver():
+    driver = Driver(headless=True, uc=True)  # Set headless to True if you want headless mode
+    return driver
+
+
+def app():
+    # Read in the starting asset list
+    asset_list = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx")
+    asset_list = asset_list[["UPRN", "ADDRESS", "POSTCODE"]]
+
+    # asset_list.to_excel("property value.xlsx", index=False)
+
+    # Generate the list of urls
+    urls = [BASE_URL.format(uprn=uprn) for uprn in asset_list["UPRN"]]
+
+    driver = webdriver.Chrome()
+
+    driver = initialize_driver()
+    driver.set_page_load_timeout(30)  # Increase page load timeout
+
+    result = []
+    for i, (url, uprn) in tqdm(enumerate(zip(urls, asset_list["UPRN"].tolist())), total=len(urls)):
+
+        # Every 10 requests sleep for an extra 7 seconds
+        if len(result) % 10 == 0 and len(result) != 0:
+            time.sleep(7)
+
+        try:
+
+            driver.get(url)
+            page_actions.wait_for_element_visible(driver, "p[data-testid='estimate-blurred']", timeout=30)
+
+            price_element = driver.find_element("css selector", "p[data-testid='estimate-blurred']")
+            price = price_element.get_text(strip=True)
+
+            low_price_element = driver.find_element("css selector", "span[data-testid='low-estimate-blurred']")
+            low_price = low_price_element.get_text(strip=True)
+
+            high_price_element = driver.find_element("css selector", "span[data-testid='high-estimate-blurred']")
+            high_price = high_price_element.get_text(strip=True)
+
+            result.append(
+                {
+                    "UPRN": uprn,
+                    "price": price,
+                    "lower_estimate": low_price,
+                    "upper_estimate": high_price
+                }
+            )
+
+            # Sleep a random amount of time between 5 and 20 seconds
+            sleep_time = 5 + (15 * random.random())
+            time.sleep(sleep_time)
+
+        except Exception as e:
+            print(f"Failed to retrieve data for UPRN {uprn} at iteration {i}: {e}")
+
+    # Store the result depending on where we are
+    savepoint = pd.DataFrame(result)
+    savepoint.to_csv(f"savepoint_index_{i}.csv", index=False)
+
+    # TODO: Testing Jina AI - didn't work but maybe one of the alternatives might work:
+    #       https://www.youtube.com/watch?v=QxHE4af5BQE
+    response = requests.get("https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/41222761/")
+    response.text
+
+
+if __name__ == "__main__":
+    app()
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@ -49,30 +49,30 @@ resource "aws_security_group" "allow_db" {

  ingress {
    # TLS (change to whatever ports you need)
-    from_port   = 5432
-    to_port     = 5432
-    protocol    = "tcp"
+    from_port = 5432
+    to_port   = 5432
+    protocol  = "tcp"
    cidr_blocks = ["0.0.0.0/0"]
  }

  egress {
-    from_port   = 0
-    to_port     = 0
-    protocol    = "-1"
+    from_port = 0
+    to_port   = 0
+    protocol  = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }
 }

 resource "aws_db_instance" "default" {
-  allocated_storage      = var.allocated_storage
-  engine                 = "postgres"
-  engine_version         = "14.10"
-  instance_class         = var.instance_class
-  db_name                = var.database_name
-  username               = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]
-  password               = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_password"]
-  parameter_group_name   = "default.postgres14"
-  skip_final_snapshot    = true
+  allocated_storage    = var.allocated_storage
+  engine               = "postgres"
+  engine_version       = "14.10"
+  instance_class       = var.instance_class
+  db_name              = var.database_name
+  username             = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]
+  password             = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_password"]
+  parameter_group_name = "default.postgres14"
+  skip_final_snapshot  = true
  vpc_security_group_ids = [aws_security_group.allow_db.id]
  lifecycle {
    prevent_destroy = true
@ -187,6 +187,22 @@ module "lambda_heat_prediction_ecr" {
  source   = "./modules/ecr"
 }

+# ECR repos for lighting cost, heating cost and hot water cost models
+module "lambda_lighting_cost_prediction_ecr" {
+  ecr_name = "lighting-cost-prediction-${var.stage}"
+  source   = "./modules/ecr"
+}
+
+module "lambda_heating_cost_prediction_ecr" {
+  ecr_name = "heating-cost-prediction-${var.stage}"
+  source   = "./modules/ecr"
+}
+
+module "lambda_hot_water_cost_prediction_ecr" {
+  ecr_name = "hot-water-cost-prediction-${var.stage}"
+  source   = "./modules/ecr"
+}
+
 ##############################################
 # CDN - Cloudfront
 ##############################################
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@ -20,21 +20,21 @@ regional_labour_variations = [

 # This data is based on the MCS database
 MCS_SOLAR_PV_COST_DATA = {
-    "last_updated": "2024-01-04",
-    "average_cost_per_kwh": 2013.94,
-    "average_cost_per_kwh-Outer London": 2618.75,
-    "average_cost_per_kwh-Inner London": 2618.75,
-    "average_cost_per_kwh-South East England": 2083.33,
-    "average_cost_per_kwh-South West England": 2113,
-    "average_cost_per_kwh-East of England": 1973.86,
-    "average_cost_per_kwh-East Midlands": 1981.86,
-    "average_cost_per_kwh-West Midlands": 1926.55,
-    "average_cost_per_kwh-North East England": 2028.49,
-    "average_cost_per_kwh-North West England": 1620.42,
-    "average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
-    "average_cost_per_kwh-Wales": 1898.83,
-    "average_cost_per_kwh-Scotland": 1967.97,
-    "average_cost_per_kwh-Northern Ireland": 2126.09,
+    "last_updated": "2024-06-10",
+    "average_cost_per_kwh": 1750,
+    "average_cost_per_kwh-Outer London": 1776,
+    "average_cost_per_kwh-Inner London": 1776,
+    "average_cost_per_kwh-South East England": 1672,
+    "average_cost_per_kwh-South West England": 1732,
+    "average_cost_per_kwh-East of England": 1721,
+    "average_cost_per_kwh-East Midlands": 1730,
+    "average_cost_per_kwh-West Midlands": 1761,
+    "average_cost_per_kwh-North East England": 1669,
+    "average_cost_per_kwh-North West England": 1764,
+    "average_cost_per_kwh-Yorkshire and the Humber": 1705,
+    "average_cost_per_kwh-Wales": 1896,
+    "average_cost_per_kwh-Scotland": 1767,
+    "average_cost_per_kwh-Northern Ireland": 1767,
 }

 # This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
@ -104,9 +104,9 @@ DOUBLE_RADIATOR_COST = 300
 FLUE_COST = 600
 PIPEWORK_COST = 750  # Min cost is £500

-# This is the cost per meter squared for cavity extraction
-# https://www.checkatrade.com/blog/cost-guides/cavity-wall-insulation-removal-cost/
-CAVITY_EXTRACTION_COST = 21.5
+# Based on SCIS figures
+# TODO: Add this to databse
+CAVITY_EXTRACTION_COST = 25


 class Costs:
@ -203,6 +203,20 @@ class Costs:
        :return: A dictionary containing detailed cost breakdown.
        """

+        # CWI usually takes 1 day
+        labour_hours = 8
+        labour_days = 1
+
+        # if the material is based on an installer cost, we return the flat price
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * wall_area
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
        material_cost_per_m2 = material["material_cost"]

        base_material_cost = material_cost_per_m2 * wall_area
@ -220,11 +234,6 @@ class Costs:

        total_cost = subtotal_before_vat + vat_cost

-        labour_hours = material["labour_hours_per_unit"] * wall_area
-
-        # Assume a team of 2
-        labour_days = (labour_hours / 8) / 2
-
        if is_extraction_and_refill:
            # bump up the cost of the work
            total_cost = total_cost + CAVITY_EXTRACTION_COST * wall_area
@ -252,6 +261,20 @@ class Costs:

        :return: A dictionary containing detailed cost breakdown.
        """
+
+        labour_hours = material["labour_hours_per_unit"] * floor_area
+        # Assume a team of 1 person
+        labour_days = labour_hours / 8
+
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * floor_area
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
        material_cost_per_m2 = material["material_cost"]

        # We inflate material costs due to recent price increases
@ -273,11 +296,6 @@ class Costs:

        total_cost = subtotal_before_vat + vat_cost

-        labour_hours = material["labour_hours_per_unit"] * floor_area
-
-        # Assume a team of 1 person
-        labour_days = labour_hours / 8
-
        return {
            "total": total_cost,
            "subtotal": subtotal_before_vat,
@ -314,6 +332,22 @@ class Costs:
        :return:
        """

+        # if the material is based on an installer cost, we return the flat price
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * wall_area
+
+            labour_hours = material["labour_hours_per_unit"] * wall_area
+
+            # To install internal wall insulation, a small to medium size project might be conducted by a team of 3-5
+            # people
+            labour_days = (labour_hours / 8) / 4
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
        # Extract and check the different types of data we'll need
        demolition_data = [x for x in non_insulation_materials if x["type"] == "iwi_wall_demolition"]
        vapour_barrier_data = [x for x in non_insulation_materials if x["type"] == "iwi_vapour_barrier"]
@ -398,6 +432,21 @@ class Costs:
        :return:
        """

+        # if the material is based on an installer cost, we return the flat price
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * insulation_floor_area
+
+            labour_hours = material["labour_hours_per_unit"] * insulation_floor_area
+            # To install suspended floor insulation, a small to medium size project might be conducted by a team of 3
+            # people
+            labour_days = (labour_hours / 8) / 3
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
        demolition_data = [x for x in non_insulation_materials if x["type"] == "suspended_floor_demolition"]
        vapour_barrier_data = [x for x in non_insulation_materials if x["type"] == "suspended_floor_vapour_barrier"]
        redecoration_data = [x for x in non_insulation_materials if x["type"] == "suspended_floor_redecoration"]
@ -500,6 +549,21 @@ class Costs:
        :return:
        """

+        # if the material is based on an installer cost, we return the flat price
+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * insulation_floor_area
+
+            labour_hours = material["labour_hours_per_unit"] * insulation_floor_area
+            # To install suspended floor insulation, a small to medium size project might be conducted by a team of 3
+            # people
+            labour_days = (labour_hours / 8) / 3
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
        demolition_data = [x for x in non_insulation_materials if x["type"] == "solid_floor_demolition"]
        preparation_data = [x for x in non_insulation_materials if x["type"] == "solid_floor_preparation"]
        vapour_barrier_data = [x for x in non_insulation_materials if x["type"] == "solid_floor_vapour_barrier"]
@ -619,6 +683,24 @@ class Costs:
        :return:
        """

+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * wall_area
+            # Add on a buffer for scaffolding
+            if self.property.data["property-type"] == "House":
+                total_cost += self.EWI_SCAFFOLDING_PRELIMINARIES * total_cost
+
+            labour_hours = material["labour_hours_per_unit"] * wall_area
+
+            # To install internal wall insulation, a small to medium size project might be conducted by a team of 3-5
+            # people
+            labour_days = (labour_hours / 8) / 4
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
        # For semi detatched and detatched houses, as well as maisonettes, we price for scaffolding

        if self.property.data["property-type"] == "House":
@ -872,6 +954,19 @@ class Costs:

        """

+        if material["is_installer_quote"]:
+            total_cost = material["total_cost"] * number_of_windows
+
+            labour_hours = material["labour_hours_per_unit"] * number_of_windows
+            # To install windows, a small to medium size project might be conducted by a team of 2-3 people
+            labour_days = (labour_hours / 8) / 2
+
+            return {
+                "total": total_cost,
+                "labour_hours": labour_hours,
+                "labour_days": labour_days,
+            }
+
        material_cost = material["material_cost"] * number_of_windows

        labour_cost = (
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@ -8,6 +8,14 @@ from recommendations.HeatingControlRecommender import HeatingControlRecommender


 class HeatingRecommender:
+    ELECTRIC_HEATING_DESCRIPTIONS = [
+        "Room heaters, electric",
+        "Electric storage heaters",
+        "Electric storage heaters, radiators",
+        "Portable electric heaters assumed for most rooms",
+    ]
+
+    high_heat_retention_contols_desc = "Controls for high heat retention storage heaters"

    def __init__(self, property_instance: Property):
        self.property = property_instance
@ -16,6 +24,24 @@ class HeatingRecommender:
        self.heating_recommendations = []
        self.heating_control_recommendations = []

+        self.has_electric_heating_description = (
+            self.property.main_heating["clean_description"] in self.ELECTRIC_HEATING_DESCRIPTIONS
+        )
+
+    def is_high_heat_retention_valid(self):
+        """
+        Check conditions if high heat retention storage is valid
+        :return:
+        """
+
+        # If the property has assumed electric heating, regardless of whether or not it has a mains connection, we
+        # can consider hhr storage heaters
+        electric_heating_assumed = (
+            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"]
+        )
+
+        return self.has_electric_heating_description or electric_heating_assumed
+
    def recommend(self, has_cavity_or_loft_recommendations, phase=0):
        """
        Produces heating recommendations
@ -34,17 +60,10 @@ class HeatingRecommender:
        # This first iteration of the recommender will provide very basic recommendation
        # We recommend heating controls based on the main heating system

-        has_electric_heating_description = self.property.main_heating["clean_description"] in [
-            "Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators"
-        ]
-
-        no_heating_no_mains = (
-            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
-            not self.property.data["mains-gas-flag"]
-        )
-
-        if has_electric_heating_description or no_heating_no_mains:
+        if self.is_high_heat_retention_valid():
            # Recommend high heat retention storage heaters
+            # TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
+            #       needs the controls
            self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)

        # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
@ -61,7 +80,7 @@ class HeatingRecommender:
        )

        # We also check if the property has electric heating, but it has access to the mains gas
-        electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
+        electic_heating_has_mains = self.has_electric_heating_description and self.property.data["mains-gas-flag"]

        portable_heaters_has_mains = (
            self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
@ -93,16 +112,19 @@ class HeatingRecommender:
        # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
        # and either allow or prevent the recommendation of an air source heat pump

-        suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
-        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
-
-        if suitable_property_type and not has_air_source_heat_pump:
+        if self.is_ashp_valid():
            self.recommend_air_source_heat_pump(
                phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
            )

        return

+    def is_ashp_valid(self):
+        suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+
+        return suitable_property_type and not has_air_source_heat_pump
+
    def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False):
        """
        This method will implement the recommendation for an air source heat pump
@ -314,6 +336,27 @@ class HeatingRecommender:

        return output

+    def is_hhr_already_installed(self):
+        """
+        Check if the property already has high heat retention storage heaters
+        :return:
+        """
+
+        already_has_hhr = "Electric storage heaters" in self.property.main_heating["clean_description"]
+
+        # Some electric storage heaters will show that the controls are "Manual charge controls" which are indicative
+        # of the old model of electric storage heaters, originating from 1961.
+        # Newer HHR storage heaters will charge up over night but will retain the heat durin the day for when warmth
+        # is actually needed, unlike traditional storage heaters that charge up at night and release heat during the day
+        # which isn't always ideal for the occupants.
+        already_has_hhr_contols = (
+            self.property.main_heating_controls[
+                "clean_description"
+            ].lower() == self.high_heat_retention_contols_desc.lower()
+        )
+
+        return already_has_hhr and already_has_hhr_contols
+
    def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only, _return=False):
        """
        We will recommend upgrading to a high heat retention storage system, if the current system is not already
@ -330,19 +373,14 @@ class HeatingRecommender:

        controls_recommender = HeatingControlRecommender(self.property)
        # The heating controls we're recommending for are based on the recommended heating system
-        high_heat_retention_contols_desc = "Controls for high heat retention storage heaters"
+
        # We only recommend Celect-type controls if the current heating system is not Celect-type controls
-        if self.property.main_heating_controls["clean_description"] != high_heat_retention_contols_desc:
+        if self.property.main_heating_controls["clean_description"] != self.high_heat_retention_contols_desc:
            controls_recommender.recommend(heating_description="Electric storage heaters, radiators")

-        # Conditions for not needing this recommendation
-        already_installed_hh_retention = (
-            "Electric storage heaters" in self.property.main_heating["clean_description"] and
-            self.property.main_heating_controls["clean_description"].lower() == high_heat_retention_contols_desc.lower()
-        )
-
+        has_hhr = self.is_hhr_already_installed()
        # Conditions for not recommending electric storage heaters
-        if already_installed_hh_retention:
+        if has_hhr:
            # No recommendation needed
            return

--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@ -1,3 +1,5 @@
+import itertools
+from utils.logger import setup_logger
 from backend.Property import Property
 from recommendations.FloorRecommendations import FloorRecommendations
 from recommendations.WallRecommendations import WallRecommendations
@ -12,13 +14,25 @@ from recommendations.HotwaterRecommendations import HotwaterRecommendations
 from recommendations.SecondaryHeating import SecondaryHeating
 from recommendations.Recommendations import Recommendations

+logger = setup_logger()
+

 class Mds:
    """
    Handles the contruction of the MDS report
    """

-    def __init__(self, property_instance: Property, materials):
+    format_map = {
+        "external_wall_insulation": "EWI (Trad Const)",
+        "internal_wall_insualtion": "IWI",
+        "cavity_wall_insulation": "CWI",
+        "loft_insulation": "LI",
+        "air_source_heat_pump": "ASHP Htg",
+        "high_heat_retention_storage_heaters": "High Heat Retention Storage Heaters",
+        "solar_pv": "Solar PV",
+    }
+
+    def __init__(self, property_instance: Property, materials, optimise_measures: bool = False):
        self.property_instance = property_instance

        self.floor_recommender = FloorRecommendations(property_instance=property_instance, materials=materials)
@ -35,14 +49,169 @@ class Mds:
        self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
        self.secondary_heating_recommender = SecondaryHeating(property_instance=property_instance)

-    def build(self):
-        if self.property_instance.measures is None:
-            raise NotImplementedError("No measures in the property - implement me")
+        # This flag indicates that we wish to optimise the measures, to the property, depending on the set of measures
+        # we have been provided
+        self.optimise_measures = optimise_measures

-        measures = self.property_instance.measures
+    def select_optimal_measure_set(self, measures):

-        measure_config_list = [list(m.keys())[0] for m in measures]
+        # This is the set
+        all_considered_measures = [
+            'external_wall_insulation',
+            'cavity_wall_insulation',
+            'loft_insulation',
+            'air_source_heat_pump',
+            'high_heat_retention_storage_heaters',
+            'solar_pv'
+        ]

+        # Check if our measures are within the ones we've handled
+        new = [m for m in measures if m not in all_considered_measures]
+        if new:
+            raise NotImplementedError("New measures - handle me")
+
+        def prune_options(options, measures):
+            options_pruned = []
+            for _group in options:
+                group_pruned = [m for m in _group if m in measures]
+                if not group_pruned:
+                    continue
+                options_pruned.append(group_pruned)
+
+            return options_pruned
+
+        # For options in here, a property could only possibly have one of these
+        one_choice_options = [
+            ["external_wall_insulation", "cavity_wall_insulation", "internal_wall_insulation"],
+            ["loft_insulation", "flat_roof_insulation", "room_in_roof_insulation"],
+            ["solid_floor_insulation", "suspended_floor_insulation"],
+        ]
+        # prune one_choice_options based on the measure set considered for this property
+        one_choice_options_pruned = prune_options(one_choice_options, measures)
+
+        # For options in here, a property could have one or the other so all should be considered
+        multi_path_options = [
+            ["air_source_heat_pump", "high_heat_retention_storage_heaters", "gas_boiler"]
+        ]
+
+        multi_path_options_pruned = prune_options(multi_path_options, measures)
+
+        one_choice_combinations = [list(itertools.product(*one_choice_options_pruned))]
+        one_choice_combinations = [list(x) for sublist in one_choice_combinations for x in sublist]
+        multi_path_combinations = [list(itertools.product(*multi_path_options_pruned))]
+        multi_path_combinations = [list(x) for sublist in multi_path_combinations for x in sublist]
+
+        one_choice_flat = [item for sublist in one_choice_options_pruned for item in sublist]
+        multi_path_flat = [item for sublist in multi_path_options_pruned for item in sublist]
+
+        remaining_measures = [
+            measure for measure in measures
+            if measure not in one_choice_flat and measure not in multi_path_flat
+        ]
+
+        # Combine one_choice and multi_path combinations with remaining measures
+        final_combinations = []
+        for one_choice in one_choice_combinations:
+            for multi_path in multi_path_combinations:
+                final_combinations.append([m for m in one_choice + multi_path + remaining_measures])
+
+        pruned_combinations = []
+        # TODO: We can do these checks once, outside of the loop and prune the combinations
+        for combination in final_combinations:
+            pruned_measures = []
+            for measure in combination:
+                if measure not in measures:
+                    continue
+                # There are certain measures where we need to
+                if measure == "external_wall_insulation":
+                    # Check if the wall is not cavity since the other wall types can take external wall insulation
+                    if (
+                        self.wall_recommender.ewi_valid() and
+                        not self.property_instance.walls["insulation_thickness"] in ["average", "above average"]
+                    ):
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "cavity_wall_insulation":
+                    # Check if the wall is cavity
+                    if (
+                        self.property_instance.walls['is_cavity_wall'] and
+                        not self.property_instance.walls['is_filled_cavity']
+                    ):
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "loft_insulation":
+                    # Check if the roof is suitable for loft insulation and the loft isn't already done
+                    # Or, if the home had a u-value for the roof, we don't recommend loft insulation
+                    if (
+                        self.property_instance.roof["is_pitched"] and
+                        not self.roof_recommender.is_loft_already_insulated() and
+                        self.property_instance.roof["thermal_transmittance_unit"] is None
+                    ):
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "solid_floor_insulation":
+                    # Check if the floor is solid
+                    if (
+                        self.property_instance.floor["is_solid"] and
+                        self.property_instance.floor["insulation_thickness"] not in ["average", "above average"] and
+                        self.property_instance.floor["thermal_transmittance_unit"] is not None
+                    ):
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "suspended_floor_insulation":
+                    # Check if the floor is suspended
+                    if (
+                        self.property_instance.floor["is_suspended"] and
+                        self.property_instance.floor["insulation_thickness"] not in ["average", "above average"] and
+                        self.property_instance.floor["thermal_transmittance_unit"] is not None
+                    ):
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "high_heat_retention_storage_heaters":
+
+                    # For the moment, we recommend storage heaters if the property doesn't already
+                    # and don't make it contngent on controls
+                    already_has_hhr = self.heating_recommender.is_hhr_already_installed()
+
+                    if (
+                        self.heating_recommender.is_high_heat_retention_valid() and
+                        not already_has_hhr
+                    ):
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "air_source_heat_pump":
+                    if self.heating_recommender.is_ashp_valid():
+                        pruned_measures.append(measure)
+                    continue
+
+                if measure == "solar_pv":
+                    if self.solar_recommender.is_solar_pv_valid():
+                        pruned_measures.append(measure)
+                    continue
+
+                raise NotImplementedError("Implement me")
+
+            if not pruned_measures:
+                continue
+
+            pruned_measures_formatted = []
+            for pm in pruned_measures:
+                pruned_measures_formatted.append({pm: self.format_map[pm]})
+
+            pruned_combinations.append(pruned_measures_formatted)
+
+        # We're left with the subset of measures that are possible for this property
+        # These are the possible groups of measures that could be applied to this home
+
+        return pruned_combinations
+
+    def _build(self, measure_config_list, measures):
        not_implemented_measures = [
            "party_wall_insulation",
            "ground_source_heat_pump",
@ -60,114 +229,164 @@ class Mds:

        mds_recommendations = []
        errors = []
+        phase = 0

        # TODO: Could use a decarator to reduce the boilerplate code - insert_recommendation_id and then the append

        if "external_wall_insulation" in measure_config_list:
-            recs = self.wall_recommender.mds_recommend_ewi(phase=0)
+            recs = self.wall_recommender.mds_recommend_ewi(phase=phase)
            if not recs:
                raise Exception("No recommendations for external wall insulation")
            recs = self.insert_recommendation_id(recs, measures, "external_wall_insulation")
            mds_recommendations.append(recs)
+            if self.optimise_measures and len(recs):
+                phase += 1

        if "cavity_wall_insulation" in measure_config_list:
-            recs = self.wall_recommender.mds_recommend_cavity_wall_insulation(phase=0)
+            recs = self.wall_recommender.mds_recommend_cavity_wall_insulation(phase=phase)
            recs = self.insert_recommendation_id(recs, measures, "cavity_wall_insulation")
            mds_recommendations.append(recs)
+            if self.optimise_measures and len(recs):
+                phase += 1

        if "loft_insulation" in measure_config_list:
            # Check if the roof is suitable for loft insulation
            if self.property_instance.roof['is_roof_room']:
                errors.append("Roof is a room")
            else:
-                recs = self.roof_recommender.mds_loft_insulation(phase=0)
+                recs = self.roof_recommender.mds_loft_insulation(phase=phase)
                if not recs:
                    raise Exception("No recommendations for loft insulation")
                recs = self.insert_recommendation_id(recs, measures, "loft_insulation")
                mds_recommendations.append(recs)
+                if self.optimise_measures and len(recs):
+                    phase += 1

        if "internal_wall_insulation" in measure_config_list:
            raise Exception("check me out 4")
-            self.wall_recommender.recommend(phase=0)
+            self.wall_recommender.recommend(phase=phase)

        if "suspended_floor_insulation" in measure_config_list:
            raise Exception("check me out 5")
-            self.floor_recommender.recommend(phase=0)
+            self.floor_recommender.recommend(phase=phase)

        if "solid_floor_insulation" in measure_config_list:
            raise Exception("check me out 6")
-            self.floor_recommender.recommend(phase=0)
+            self.floor_recommender.recommend(phase=phase)

        if "air_source_heat_pump" in measure_config_list:
            recs = self.heating_recommender.recommend_air_source_heat_pump(
-                phase=0, has_cavity_or_loft_recommendations=False, _return=True
+                phase=phase, has_cavity_or_loft_recommendations=False, _return=True
            )
            recs = self.insert_recommendation_id(recs, measures, "air_source_heat_pump")
            mds_recommendations.append(recs)
+            if self.optimise_measures and len(recs):
+                phase += 1

-        if "electric_storage_heaters" in measure_config_list:
+        if "high_heat_retention_storage_heaters" in measure_config_list:
            recs = self.heating_recommender.recommend_hhr_storage_heaters(
-                phase=0, system_change=True, heating_controls_only=False, _return=True
+                phase=phase, system_change=True, heating_controls_only=False, _return=True
            )
-            recs = self.insert_recommendation_id(recs, measures, "electric_storage_heaters")
-            mds_recommendations.append(recs)
+            if recs is None:
+                logger.info(
+                    f"No recommendations for high heat retention storage heaters, current heating "
+                    f"{self.property_instance.main_heating['clean_description']}"
+                )
+            else:
+                recs = self.insert_recommendation_id(recs, measures, "high_heat_retention_storage_heaters")
+                mds_recommendations.append(recs)
+                if self.optimise_measures and len(recs):
+                    phase += 1

        if "low_energy_lighting" in measure_config_list:
            raise Exception("check me out 9")
-            self.lighting_recommender.recommend(phase=0)
+            self.lighting_recommender.recommend(phase=phase)

        if "cylinder_insulation" in measure_config_list:
            raise Exception("check me out 10")
-            self.hotwater_recommender.recommend(phase=0)
+            self.hotwater_recommender.recommend(phase=phase)

        if "smart_controls" in measure_config_list:
            raise Exception("check me out 11")
-            self.heating_recommender.recommend(phase=0)
+            self.heating_recommender.recommend(phase=phase)

        if "zone_controls" in measure_config_list:
            raise Exception("check me out 12")
-            self.heating_recommender.recommend(phase=0)
+            self.heating_recommender.recommend(phase=phase)

        if "trvs" in measure_config_list:
            raise Exception("check me out 13")
-            self.heating_recommender.recommend(phase=0)
+            self.heating_recommender.recommend(phase=phase)

        if "solar_pv" in measure_config_list:
-            recs = self.solar_recommender.mds_recommend(phase=0, solar_pv_percentage=0.5)
+            recs = self.solar_recommender.mds_recommend(phase=phase, solar_pv_percentage=0.5)
            recs = self.insert_recommendation_id(recs, measures, "solar_pv")
            mds_recommendations.append(recs)
+            if self.optimise_measures and len(recs):
+                phase += 1

        if "double_glazing" in measure_config_list:
            raise Exception("check me out 15")
-            self.windows_recommender.recommend(phase=0)
+            self.windows_recommender.recommend(phase=phase)

        if "mechanical_ventilation" in measure_config_list:
            raise Exception("check me out 16")
-            self.ventilation_recomender.recommend(phase=0)
+            self.ventilation_recomender.recommend(phase=phase)

        if "gas_boiler" in measure_config_list:
            raise Exception("check me out 17")
-            self.heating_recommender.recommend(phase=0)
+            self.heating_recommender.recommend(phase=phase)

        if "flat_roof_insulation" in measure_config_list:
            raise Exception("check me out 18")
-            self.roof_recommender.recommend(phase=0)
+            self.roof_recommender.recommend(phase=phase)

        if "room_in_roof_insulation" in measure_config_list:
            raise Exception("check me out 19")
-            self.roof_recommender.recommend(phase=0)
+            self.roof_recommender.recommend(phase=phase)

        property_representative_recommendations = Recommendations.create_representative_recommendations(
            mds_recommendations, non_invasive_recommendations=[]
        )

-        return property_representative_recommendations, errors
+        return mds_recommendations, property_representative_recommendations, errors
+
+    def build(self):
+        if self.property_instance.measures is None:
+            raise NotImplementedError("No measures in the property - implement me")
+
+        if self.optimise_measures:
+            measures_set = self.select_optimal_measure_set(self.property_instance.measures)
+            mds_recommendations_map = {}
+            representative_recommendations_map = {}
+            errors_map = {}
+            for measures in measures_set:
+                measure_config_list = [list(x.keys())[0] for x in measures]
+                mds_recommendations, rep_recommendations, errors = self._build(
+                    measure_config_list=measure_config_list,
+                    measures=measures
+                )
+                if errors:
+                    logger.info(f"Errors: {errors}")
+
+                mds_recommendations_map[str(measure_config_list)] = mds_recommendations
+                representative_recommendations_map[str(measure_config_list)] = rep_recommendations
+                errors_map[str(measure_config_list)] = errors
+
+            return mds_recommendations_map, representative_recommendations_map, errors_map
+
+        else:
+            measure_config_list = [list(m.keys())[0] for m in self.property_instance.measures]
+            return self._build(measure_config_list=measure_config_list, measures=self.property_instance.measures)

    @staticmethod
    def insert_recommendation_id(recommendations, measures, measure_name):
        # Insert the recommendation identifier into this recommendation
        measure_config = [m for m in measures if measure_name in m][0]
+
+        idx = 0
        for r in recommendations:
-            r["recommendation_id"] = list(measure_config.values())[0]
+            r["recommendation_id"] = list(measure_config.values())[0] + "-" + str(idx)
+            idx += 1

        return recommendations
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@ -227,7 +227,7 @@ class Recommendations:

            recommendations_by_type = sorted(recommendations_by_type, key=lambda x: x["type"])
            representative_recommendations = []
-            for type, recommendations in groupby(recommendations_by_type, key=lambda x: x["type"]):
+            for _type, recommendations in groupby(recommendations_by_type, key=lambda x: x["type"]):
                recommendations = list(recommendations)
                # We also create an efficiency key, which is used to sort the recommendations
                if has_u_value:
@ -311,27 +311,19 @@ class Recommendations:
        # This is the unadjusted resulting heat demand
        predicted_heat_demand_change = starting_heat_demand - expected_heat_demand

-        # We don't want to adjust the heat demand for mechanical ventilation so we add it back on
-
-        # We adjust the heat demand figures to align to the UCL paper
-        current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy_consumption=starting_heat_demand,
-            current_epc_rating=property_instance.data["current-energy-rating"],
-        )
-
        # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
        #       actually implemented
        expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
            epc_energy_consumption=expected_heat_demand,
            current_epc_rating=property_instance.data["current-energy-rating"],
+            total_floor_area=property_instance.floor_area
        )

        adjusted_heat_demand_change = (
-            current_adjusted_energy - expected_adjusted_energy
+            property_instance.current_adjusted_energy - expected_adjusted_energy
        )

        # TODO: We should determine if the home is gas & electricity or just electricity
-        current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
        expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)

        for recommendations_by_type in property_recommendations:
@ -410,8 +402,6 @@ class Recommendations:

        return (
            property_recommendations,
-            current_adjusted_energy,
            expected_adjusted_energy,
-            current_energy_bill,
            expected_energy_bill
        )
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@ -54,6 +54,13 @@ class RoofRecommendations:
            ]
        ]

+        # Extract the insulation thickness from the roof, which is used throughout this method
+        self.insulation_thickness = convert_thickness_to_numeric(
+            self.property.roof["insulation_thickness"],
+            self.property.roof["is_pitched"],
+            self.property.roof["is_flat"]
+        )
+
    def mds_loft_insulation(self, phase):
        """
        For usages within the mds report
@ -62,18 +69,18 @@ class RoofRecommendations:
        """
        self.recommendations = []

-        insulation_thickness = convert_thickness_to_numeric(
-            self.property.roof["insulation_thickness"],
-            self.property.roof["is_pitched"],
-            self.property.roof["is_flat"]
-        )
-
        u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band})

-        self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
+        self.recommend_roof_insulation(u_value, self.insulation_thickness, self.property.roof, phase)

        return self.recommendations

+    def is_loft_already_insulated(self):
+        """
+        Check if the loft is already insulated
+        """
+        return (self.insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]
+
    def recommend(self, phase):

        if self.property.roof["has_dwelling_above"]:
@ -81,21 +88,15 @@ class RoofRecommendations:

        u_value = self.property.roof["thermal_transmittance"]

-        insulation_thickness = convert_thickness_to_numeric(
-            self.property.roof["insulation_thickness"],
-            self.property.roof["is_pitched"],
-            self.property.roof["is_flat"]
-        )
-
        # We check if the roof is already insulated and if so, we exit

        # Building regulations part L recommend installing at least 270mm of insulation, however generally we
        # experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation
        # This only holds true for pitched roofs.
-        if (insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
+        if self.is_loft_already_insulated():
            return

-        if (insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:
+        if (self.insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:
            return

        if self.property.roof["is_roof_room"]:
@ -119,7 +120,7 @@ class RoofRecommendations:
            return

        if self.property.roof["is_pitched"] or self.property.roof["is_flat"]:
-            self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
+            self.recommend_roof_insulation(u_value, self.insulation_thickness, self.property.roof, phase)
            return

        if self.property.roof["is_roof_room"]:
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@ -4,10 +4,13 @@ from recommendations.recommendation_utils import override_costs


 class SolarPvRecommendations:
+    # Solar panel specs based on Eurener 400s solar panels
+    # https://midsummerwholesale.co.uk/buy/eurener/eurener-400w-mepv-zebra-ab-half-cut-mono
    # Approximate area of the solar panels
-    SOLAR_PANEL_AREA = 1.6
+    SOLAR_PANEL_AREA = 1.79
    # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
-    SOLAR_PANEL_WATTAGE = 250
+    # This was previously set to 250w, but has been upped to 400 based on the systems used by Cotswolrd Energy Group
+    SOLAR_PANEL_WATTAGE = 400

    MAX_SYSTEM_WATTAGE = 6000
    MIN_SYSTEM_WATTAGE = 1000
@ -75,15 +78,7 @@ class SolarPvRecommendations:
            }
        ]

-    def recommend(self, phase):
-        """
-        We check if a property is potentially suitable for solar PV based on the following criteria:
-        - The property is a house or bungalow
-        - The property has a flat or pitched roof
-        - The property does not have existing solar pv
-        :return:
-        """
-
+    def is_solar_pv_valid(self):
        is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
        is_valid_roof_type = (
            self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
@ -93,7 +88,18 @@ class SolarPvRecommendations:
            None, 0, self.property.DATA_ANOMALY_MATCHES
        ]

-        if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
+        return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
+
+    def recommend(self, phase):
+        """
+        We check if a property is potentially suitable for solar PV based on the following criteria:
+        - The property is a house or bungalow
+        - The property has a flat or pitched roof
+        - The property does not have existing solar pv
+        :return:
+        """
+
+        if not self.is_solar_pv_valid():
            return

        solar_pv_percentage = self.property.solar_pv_percentage
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@ -23,7 +23,7 @@ class WallRecommendations(Definitions):
    # After 1930, Solid brick walls became less populate and instead, cavity walls became a
    # more popular choice
    YEARS_CAVITY_WALLS_BEGAN = 1930
-    U_VALUE_UNIT = 'w/m-¦k'
+    U_VALUE_UNIT = "w/m-¦k"

    # part L building regulations indicate that any rennovations on an existing property's walls should
    # achieve a U-value of no higher than 0.3
@ -55,23 +55,26 @@ class WallRecommendations(Definitions):
    NEW_BUILD_INSULATED = 0.75

    # These are the ending descriptions we consider for walls with external insulation
+    # This maps the clean descriptions to the ending descriptions
    EXTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
-        "solid_brick": "Solid brick, with external insulation",
-        "cob": "Cob, with external insulation",
-        "system_built": "System built, with external insulation",
-        "granite_or_whinstone": 'Granite or whinstone, with external insulation',
-        "sandstone_or_limestone": 'Sandstone or limestone, with external insulation',
-        "timber_frame": "Timber frame, with external insulation"
+        "Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
+        "Solid brick, as built, no insulation": "Solid brick, with external insulation",
+        "Solid brick, as built, insulated": "Solid brick, with external insulation",
+        "Cob, as built": "Cob, with external insulation",
+        "System built, as built, no insulation": "System built, with external insulation",
+        "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
+        "Timber frame, as built, no insulation": "Timber frame, with external insulation",
    }

    # These are the ending descriptions we consider for walls with internal insulation
    INTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
-        "solid_brick": "Solid brick, with internal insulation",
-        "cob": "Cob, with internal insulation",
-        "system_built": "System built, with internal insulation",
-        "granite_or_whinstone": 'Granite or whinstone, with internal insulation',
-        "sandstone_or_limestone": 'Sandstone or limestone, with internal insulation',
-        "timber_frame": "Timber frame, with internal insulation"
+        "Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
+        "Solid brick, as built, no insulation": "Solid brick, with internal insulation",
+        "Solid brick, as built, insulated": "Solid brick, with internal insulation",
+        "Cob, as built": "Cob, with internal insulation",
+        "System built, as built, no insulation": "System built, with internal insulation",
+        "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
+        "Timber frame, as built, no insulation": "Timber frame, with internal insulation",
    }

    def __init__(
@ -96,9 +99,10 @@ class WallRecommendations(Definitions):
        ]

        self.internal_wall_non_insulation_materials = [
-            part for part in materials if part["type"] in [
-                "iwi_wall_demolition", "iwi_vapour_barrier", "iwi_redecoration"
-            ]
+            part
+            for part in materials
+            if part["type"]
+               in ["iwi_wall_demolition", "iwi_vapour_barrier", "iwi_redecoration"]
        ]

        self.external_wall_insulation_materials = [
@ -106,12 +110,11 @@ class WallRecommendations(Definitions):
        ]

        self.external_wall_non_insulation_materials = [
-            part for part in materials if part["type"] in [
-                "ewi_wall_demolition", "ewi_wall_preparation", "ewi_wall_redecoration"
-            ]
+            part
+            for part in materials
+            if part["type"] in ["ewi_wall_demolition", "ewi_wall_preparation", "ewi_wall_redecoration"]
        ]

-    @property
    def ewi_valid(self):
        """
        This method check available data, to determine if a property is suitable for external wall insulation
@ -119,7 +122,22 @@ class WallRecommendations(Definitions):

        # Current logic: If the property is in a conservation area/heritage building/listed building or a flat,
        # it is not suitable for EWI
-        if self.property.restricted_measures or (self.property.data["property-type"].lower() == "flat"):
+        if self.property.restricted_measures or (
+            self.property.data["property-type"].lower() == "flat"
+        ) or (
+            self.property.walls['is_cob'] or
+            self.property.walls['is_sandstone_or_limestone'] or
+            self.property.walls["is_cavity_wall"]
+        ):
+            return False
+
+        return True
+
+    def is_suitable_for_solid_insulation(self):
+        """
+        Checks if the wall is of a suitable type for internal/external wall insulation
+        """
+        if self.property.walls["is_cavity_wall"] or self.property.walls["is_cob"]:
            return False

        return True
@ -171,31 +189,43 @@ class WallRecommendations(Definitions):
        # recommend internal wall insulation as a possible measure

        u_value = self.property.walls["thermal_transmittance"]
+        u_value = None if pd.isnull(u_value) else u_value
+
        is_cavity_wall = self.property.walls["is_cavity_wall"]
        insulation_thickness = self.property.walls["insulation_thickness"]

        # We check if the wall is already insulated and if so, we exit
-        if ((insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]) and (
-            "cavity_extract_and_refill" not in self.property.non_invasive_recommendations
+        if (
+            (insulation_thickness in ["average", "above average"])
+            or self.property.walls["is_filled_cavity"]
+        ) and (
+            "cavity_extract_and_refill"
+            not in self.property.non_invasive_recommendations
        ):
            return

        if u_value:

            if self.property.walls["thermal_transmittance_unit"] != self.U_VALUE_UNIT:
-                raise NotImplementedError("Haven't handled the case of other u value units yet")
+                raise NotImplementedError(
+                    "Haven't handled the case of other u value units yet"
+                )

            # If the property is a new build and the U-value is below 0.75, we don't recommend insulation because it's
            # not practical
-            if (self.property.data["transaction-type"] == "new dwelling") and (u_value <= self.NEW_BUILD_INSULATED):
+            if (self.property.data["transaction-type"] == "new dwelling") and (
+                u_value <= self.NEW_BUILD_INSULATED
+            ):
                # Recommend nothing
                return

            # We can't detect it's a cavity wall, but it was built after 1990 so likely built with insulation already
            # + it already has a U-value WORSE than the building regulations, so we recommend either internal or
            # external wall insulation
-            if (not is_cavity_wall) and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION) and (
-                u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE
+            if (
+                (not is_cavity_wall)
+                and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
+                and (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
            ):
                # Recommend insulation
                self.find_insulation(u_value, phase)
@ -203,8 +233,10 @@ class WallRecommendations(Definitions):

            # We can't detect it's a cavity wall, but it was built after 1990 so likely built with insulation already
            # + it already has a U-value better than the building regulations, so we don't need to recommend anything
-            if (not is_cavity_wall) and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION) and (
-                u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE
+            if (
+                (not is_cavity_wall)
+                and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
+                and (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
            ):
                # Recommend nothing
                return
@ -228,7 +260,7 @@ class WallRecommendations(Definitions):
            return

        # Remaining wall types are treated with IWI or EWI
-        if u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
+        if (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and self.is_suitable_for_solid_insulation():
            self.find_insulation(u_value, phase)
            return

@ -267,28 +299,40 @@ class WallRecommendations(Definitions):
        recommendations = []
        for _, material in insulation_materials.iterrows():

-            part_u_value = r_value_per_mm_to_u_value(cavity_width, material["r_value_per_mm"])
+            part_u_value = r_value_per_mm_to_u_value(
+                cavity_width, material["r_value_per_mm"]
+            )

            _, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
            new_u_value = math.ceil(new_u_value * 100.0) / 100.0

            if is_diminishing_returns(
-                recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
+                recommendations,
+                new_u_value,
+                lowest_selected_u_value,
+                self.DIMINISHING_RETURNS_U_VALUE,
            ):
                continue

            if new_u_value <= self.BUILDING_REGULATIONS_PART_L_CAVITY_WALL_MAX_U_VALUE:
-                lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
+                lowest_selected_u_value = update_lowest_selected_u_value(
+                    lowest_selected_u_value, new_u_value
+                )

-                is_extraction_and_refill = "cavity_extract_and_refill" in self.property.non_invasive_recommendations
+                is_extraction_and_refill = (
+                    "cavity_extract_and_refill"
+                    in self.property.non_invasive_recommendations
+                )

                cost_result = self.costs.cavity_wall_insulation(
                    wall_area=self.property.insulation_wall_area,
                    material=material.to_dict(),
-                    is_extraction_and_refill=is_extraction_and_refill
+                    is_extraction_and_refill=is_extraction_and_refill,
                )

-                already_installed = "cavity_wall_insulation" in self.property.already_installed
+                already_installed = (
+                    "cavity_wall_insulation" in self.property.already_installed
+                )
                if already_installed:
                    cost_result = override_costs(cost_result)

@ -302,18 +346,19 @@ class WallRecommendations(Definitions):

                wall_ending_config = WallAttributes("Cavity wall, filled cavity").process()

-                simulation_config = {}
-                if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
-                    simulation_config = {
-                        "walls_energy_eff_ending": "Good",
-                        "walls_thermal_transmittance_ending": new_u_value
-                    }
-
                walls_simulation_config = check_simulation_difference(
                    new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
                )

-                simulation_config = {**simulation_config, **walls_simulation_config}
+                simulation_config = self.set_starting_simulation_config(
+                    wall_ending_config=wall_ending_config
+                )
+
+                simulation_config = {
+                    **simulation_config,
+                    **walls_simulation_config,
+                    "walls_thermal_transmittance_ending": new_u_value
+                }

                recommendations.append(
                    {
@ -323,7 +368,7 @@ class WallRecommendations(Definitions):
                                part=material.to_dict(),
                                quantity=self.property.insulation_wall_area,
                                quantity_unit=QuantityUnits.m2.value,
-                                cost_result=cost_result
+                                cost_result=cost_result,
                            )
                        ],
                        "type": "cavity_wall_insulation",
@ -340,30 +385,35 @@ class WallRecommendations(Definitions):
        self.recommendations = recommendations

    def get_internal_external_wall_description(self, description_map, new_u_value):
-        if self.property.walls["is_solid_brick"]:
-            return description_map["solid_brick"]
-
-        if self.property.walls["is_cob"]:
-            return description_map["cob"]
-
-        if self.property.walls["is_system_built"]:
-            return description_map["system_built"]
-
-        if self.property.walls["is_granite_or_whinstone"]:
-            return description_map["granite_or_whinstone"]
-
-        if self.property.walls["is_sandstone_or_limestone"]:
-            return description_map["sandstone_or_limestone"]
-
-        if self.property.walls["is_timber_frame"]:
-            return description_map["timber_frame"]

        if "Average thermal transmittance" in self.property.walls["clean_description"]:
            if new_u_value is None:
                raise ValueError("New u value is None")
            return f'Average thermal transmittance {new_u_value} W/m-¦K'

-        raise NotImplementedError("Not implemented yet")
+        return description_map[self.property.walls["clean_description"]]
+
+    def set_starting_simulation_config(self, wall_ending_config):
+        """
+        Helper function to set the starting simulation config
+        """
+
+        simulation_config = {}
+        if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+            simulation_config = {
+                "walls_energy_eff_ending": "Good"
+            }
+
+        # We check if we have double insulation in any instances
+        double_insulation = (
+            (wall_ending_config["is_filled_cavity"] and wall_ending_config["external_insulation"]) or
+            (wall_ending_config["is_filled_cavity"] and wall_ending_config["internal_insulation"]) or
+            (wall_ending_config["external_insulation"] and wall_ending_config["internal_insulation"])
+        )
+        if double_insulation:
+            simulation_config["walls_energy_eff_ending"] = "Very Good"
+
+        return simulation_config

    def _find_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):

@ -373,7 +423,9 @@ class WallRecommendations(Definitions):

            for _, material in insulation_material_group.iterrows():

-                part_u_value = r_value_per_mm_to_u_value(material["depth"], material["r_value_per_mm"])
+                part_u_value = r_value_per_mm_to_u_value(
+                    material["depth"], material["r_value_per_mm"]
+                )
                _, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
                new_u_value = math.ceil(new_u_value * 100.0) / 100.0

@ -384,22 +436,30 @@ class WallRecommendations(Definitions):
                # further into the diminishing returns threshold and can shouldn't be

                if is_diminishing_returns(
-                    recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
+                    recommendations,
+                    new_u_value,
+                    lowest_selected_u_value,
+                    self.DIMINISHING_RETURNS_U_VALUE,
                ):
                    continue

                # We allow a small tolerance for error so we don't discount the recommendation entirely
                if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:

-                    lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
+                    lowest_selected_u_value = update_lowest_selected_u_value(
+                        lowest_selected_u_value, new_u_value
+                    )

                    if material["type"] == "internal_wall_insulation":
                        cost_result = self.costs.internal_wall_insulation(
                            wall_area=self.property.insulation_wall_area,
                            material=material.to_dict(),
-                            non_insulation_materials=non_insulation_materials
+                            non_insulation_materials=non_insulation_materials,
+                        )
+                        already_installed = (
+                            "internal_wall_insulation"
+                            in self.property.already_installed
                        )
-                        already_installed = "internal_wall_insulation" in self.property.already_installed
                        if already_installed:
                            cost_result = override_costs(cost_result)

@ -411,9 +471,12 @@ class WallRecommendations(Definitions):
                        cost_result = self.costs.external_wall_insulation(
                            wall_area=self.property.insulation_wall_area,
                            material=material.to_dict(),
-                            non_insulation_materials=non_insulation_materials
+                            non_insulation_materials=non_insulation_materials,
+                        )
+                        already_installed = (
+                            "external_wall_insulation"
+                            in self.property.already_installed
                        )
-                        already_installed = "external_wall_insulation" in self.property.already_installed
                        if already_installed:
                            cost_result = override_costs(cost_result)

@ -425,16 +488,14 @@ class WallRecommendations(Definitions):

                    wall_ending_config = WallAttributes(new_description).process()

-                    simulation_config = {}
-                    if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
-                        simulation_config = {
-                            "walls_energy_eff_ending": "Good"
-                        }
-
                    walls_simulation_config = check_simulation_difference(
                        new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
                    )

+                    simulation_config = self.set_starting_simulation_config(
+                        wall_ending_config=wall_ending_config
+                    )
+
                    simulation_config = {
                        **walls_simulation_config,
                        **simulation_config,
@ -449,7 +510,7 @@ class WallRecommendations(Definitions):
                                    part=material.to_dict(),
                                    quantity=self.property.insulation_wall_area,
                                    quantity_unit=QuantityUnits.m2.value,
-                                    cost_result=cost_result
+                                    cost_result=cost_result,
                                )
                            ],
                            "type": material["type"],
@ -478,19 +539,21 @@ class WallRecommendations(Definitions):
        # consider diminishing returns between the two as they are considered to be separate measures

        ewi_recommendations = []
-        if self.ewi_valid:
+        if self.ewi_valid():
            ewi_recommendations = self._find_insulation(
                u_value=u_value,
-                insulation_materials=pd.DataFrame(self.external_wall_insulation_materials),
+                insulation_materials=pd.DataFrame(
+                    self.external_wall_insulation_materials
+                ),
                non_insulation_materials=self.external_wall_non_insulation_materials,
-                phase=phase
+                phase=phase,
            )

        iwi_recommendations = self._find_insulation(
            u_value=u_value,
            insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
            non_insulation_materials=self.internal_wall_non_insulation_materials,
-            phase=phase
+            phase=phase,
        )

        self.recommendations += ewi_recommendations + iwi_recommendations
@ -498,12 +561,16 @@ class WallRecommendations(Definitions):
    @staticmethod
    def _make_description(material):
        if material["type"] == "internal_wall_insulation":
-            return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} on internal "
-                    f"walls")
+            return (
+                f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} on internal "
+                f"walls"
+            )

        if material["type"] == "external_wall_insulation":
-            return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} on external "
-                    f"walls")
+            return (
+                f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} on external "
+                f"walls"
+            )

        if material["type"] == "cavity_wall_insulation":
            return f"Fill cavity with {material['description']}"
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@ -14,7 +14,7 @@ class WindowsRecommendations:
        # glazed
        "most": 0.33,
        # If glazing is partial, we assume 50/50 split between glazed and unglazed
-        "partial": 0.5
+        "partial": 0.5,
    }

    def __init__(self, property_instance: Property, materials: List):
@ -52,14 +52,20 @@ class WindowsRecommendations:
        if not number_of_windows:
            raise ValueError("Number of windows not specified")

-        if self.property.windows["has_glazing"] & (self.property.windows["glazing_coverage"] == "full"):
+        if self.property.windows["has_glazing"] & (
+            self.property.windows["glazing_coverage"] == "full"
+        ):
            return

        # We scale the number of windows based on the proportion of existing glazing
        if self.property.data["multi-glaze-proportion"] != "":
-            n_windows_scalar = 1 - (int(self.property.data["multi-glaze-proportion"]) / 100)
+            n_windows_scalar = 1 - (
+                int(self.property.data["multi-glaze-proportion"]) / 100
+            )
        else:
-            n_windows_scalar = self.COVERAGE_MAP.get(self.property.windows["glazing_coverage"], 1)
+            n_windows_scalar = self.COVERAGE_MAP.get(
+                self.property.windows["glazing_coverage"], 1
+            )

        number_of_windows *= n_windows_scalar
        number_of_windows = np.ceil(number_of_windows)
@ -68,7 +74,7 @@ class WindowsRecommendations:
        cost_result = self.costs.window_glazing(
            number_of_windows=number_of_windows,
            material=self.glazing_material,
-            is_secondary_glazing=is_secondary_glazing
+            is_secondary_glazing=is_secondary_glazing,
        )

        already_installed = "windows_glazing" in self.property.already_installed
@ -76,18 +82,26 @@ class WindowsRecommendations:
            cost_result = override_costs(cost_result)
            description = "The property already has double glazing installed. No further action is required."
        else:
-            glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
+            glazing_type = (
+                "secondary glazing" if is_secondary_glazing else "double glazing"
+            )
            if self.property.windows["glazing_coverage"] in ["partial", "most"]:
                description = f"Install {glazing_type} to the remaining windows"
            else:
                description = f"Install {glazing_type} to all windows"

            if self.property.is_listed:
-                description += ". Secondary glazing recommended due to listed building status"
+                description += (
+                    ". Secondary glazing recommended due to listed building status"
+                )
            elif self.property.is_heritage:
-                description += ". Secondary glazing recommended due to herigate building status"
+                description += (
+                    ". Secondary glazing recommended due to herigate building status"
+                )
            elif self.property.in_conservation_area:
-                description += ". Secondary glazing recommended due to conservation area status"
+                description += (
+                    ". Secondary glazing recommended due to conservation area status"
+                )

        self.recommendation = [
            {
@ -100,6 +114,6 @@ class WindowsRecommendations:
                "sap_points": None,
                "already_installed": already_installed,
                **cost_result,
-                "is_secondary_glazing": is_secondary_glazing
+                "is_secondary_glazing": is_secondary_glazing,
            }
        ]
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@ -18,11 +18,10 @@ def prepare_input_measures(property_recommendations, goal):

    input_measures = []
    for recs in property_recommendations:
+
        if recs[0]["type"] == "solar_pv":
-            # if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation.
-            # That will ensure that the optimiser only considers solar recommendations with batteries, so we don't
-            # under-report the potential cost
-            recs = [r for r in recs if r["has_battery"]]
+            # if the recommendation is a solar recommendation with a battery, we exclude it from the optimisation.
+            recs = [r for r in recs if ~r["has_battery"]]

        input_measures.append(
            [
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@ -673,8 +673,10 @@ def esimtate_pitched_roof_area(floor_area: float, floor_height: float) -> float:


 def estimate_windows(
-    property_type, built_form, construction_age_band, floor_area, number_habitable_rooms, extension_count
+    property_type, built_form, construction_age_band, floor_area, number_habitable_rooms
 ):
+    # If there is an extension, that will boost the number of habitable rooms
+    
    # Base window count based on habitable rooms
    window_count = number_habitable_rooms

@ -717,9 +719,6 @@ def estimate_windows(
        # Older houses with smaller, more numerous windows
        window_count += 1

-    # Adjust for extensions (each extension might add windows)
-    window_count += extension_count
-
    # Adjustments for specific property types
    if property_type in ["Flat", "Maisontte"]:
        # Flats might have fewer windows due to shared walls
@ -756,17 +755,23 @@ def calculate_cavity_age(newest_epc, older_epcs, cleaned):
    return cavity_age


-def check_simulation_difference(old_config, new_config, prefix=""):
+def check_simulation_difference(old_config, new_config, prefix="", keys_with_prefix=None):
    """
    Given two dictionaries, that describe the heating control configurations, this method will compare the two
    and pick out the differences. These differences will be things that have been added and things that have been
    removed. This will be used to determine how we should be updating the configuration in the simulation
    :return:
    """
+
+    keys_with_prefix = (
+        ["is_assumed", "thermal_transmittance", "insulation_thickness"] if keys_with_prefix is None
+        else keys_with_prefix
+    )
+
    differences = {}
    for key in new_config:
        if old_config[key] != new_config[key]:
-            new_key = prefix + key + "_ending" if key in ["is_assumed", "thermal_transmittance"] else key + "_ending"
+            new_key = prefix + key + "_ending" if key in keys_with_prefix else key + "_ending"
            differences[new_key] = new_config[key]

    return differences
--- a/utils/s3.py
+++ b/utils/s3.py
@ -246,3 +246,33 @@ def read_csv_from_s3(bucket_name, filepath):
    data = list(reader)

    return data
+
+
+def list_files_in_s3_folder(bucket_name, folder_name):
+    """
+    List all files in a given folder in an S3 bucket.
+
+    :param bucket_name: The name of the S3 bucket.
+    :param folder_name: The folder name within the S3 bucket.
+    :return: A list of file keys in the specified S3 folder.
+    """
+    try:
+        s3 = boto3.client('s3')
+        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
+
+        if 'Contents' not in response:
+            logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
+            return []
+
+        file_keys = [content['Key'] for content in response['Contents']]
+        return file_keys
+
+    except NoCredentialsError:
+        logger.error("Credentials not available.")
+        return []
+    except PartialCredentialsError:
+        logger.error("Incomplete credentials provided.")
+        return []
+    except Exception as e:
+        logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
+        return []
				`@ -0,0 +1 @@`
				`[{"EPC": "D", "count": 1718}, {"EPC": "C", "count": 1343}, {"EPC": "E", "count": 538}, {"EPC": "F", "count": 80}, {"EPC": "B", "count": 52}, {"EPC": "G", "count": 3}, {"EPC": "A", "count": 2}]`
				`@ -0,0 +1 @@`
				`[{"is_real_epc": true, "count": 3736}, {"is_real_epc": false, "count": 1509}]`