Merge pull request #291 from Hestia-Homes/main

Huge dev deployment
2026-06-08 11:17:27 +00:00 · 2024-04-15 13:41:58 +01:00 · 2024-04-15 13:41:58 +01:00 · c23ad48e1b
commit c23ad48e1b
parent b457df4c63 4cad8e243b
129 changed files with 50560 additions and 3785 deletions
--- a/.gitignore
+++ b/.gitignore
@ -241,6 +241,7 @@ fabric.properties
 # Locally stored data
 local_data/*
 /local_data/*
+etl/epc/local_data/*

 *.DS_Store
 infrastructure/terraform/.terraform*
@ -255,7 +256,7 @@ open_uprn/.idea/
 conservation_areas/.idea/
 model_data/.idea/
 model_data/simulation_system/.idea/
-
+model_data/simulation_system/
 model_data/simulation_system/data*
 model_data/simulation_system/model_directory/
 model_data/simulation_system/predictions/
@ -264,4 +265,7 @@ model_data/simulation_system/predictions/
 .idea/misc.iml

 adhoc
-adhoc/*
+adhoc/*
+
+etl-router-venv/
+refactor_datasets/
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -1,3 +1,5 @@
 # Default ignored files
 /shelf/
 /workspace.xml
+# GitHub Copilot persisted chat sessions
+/copilot/chatSessions
--- a/BaseUtility.py
+++ b/BaseUtility.py
@ -45,7 +45,9 @@ class Definitions:
        # contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases.
        "NULL",
        # We sometimes see fields populated with just an empty string.
-        ""
+        "",
+        # An older value which rarely shows up but has been seen in the data.
+        "UNKNOWN",
    }

    DATA_ANOMALY_SUBSTRINGS = {
--- a/backend/DbClient.py
+++ b/backend/DbClient.py
@ -0,0 +1,7 @@
+class DbClient:
+
+    def __init__(self):
+        """
+        This class handles interaction with the database
+        """
+        pass
--- a/backend/OrdnanceSurvey.py
+++ b/backend/OrdnanceSurvey.py
@ -0,0 +1,105 @@
+from functools import lru_cache
+import urllib.parse
+import requests
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class OrdnanceSuveyClient:
+
+    def __init__(self, address, postcode, api_key):
+        """
+        This class is tasked with interaction with the ordnance survey API.
+        :param address: The address for the property to search for
+        :param postcode: The postcode for the property to search for
+        """
+
+        self.address = address
+        self.postcode = postcode
+        self.full_address = ", ".join([self.address, self.postcode])
+        self.api_key = api_key
+
+        self.results = None
+
+        self.most_relevant_result = None
+        self.property_type = None
+        self.built_form = None
+        # This will be postcode and address, as returned by the ordnance survey
+        self.address_os = None
+        self.postcode_os = None
+
+    def set_places_address(self):
+        """
+        Given a response from the places api, this function will set the address and postcode of the property
+        """
+
+        if self.most_relevant_result is None:
+            raise ValueError("No results found - run get_places_api first")
+
+        self.address_os = self.most_relevant_result["ADDRESS"]
+        self.postcode_os = self.most_relevant_result["POSTCODE"]
+        # We strip out the postcode from the address as this is already stored separately
+        self.address_os = self.address_os.replace(self.postcode_os, "").strip()
+        # Remove trailing comma
+        self.address_os = self.address_os.rstrip(",").strip()
+        # Convert to title case
+        self.address_os = self.address_os.title()
+        # Make sure postcode is upper case
+        self.postcode_os = self.postcode_os.upper()
+
+    @lru_cache(maxsize=128)
+    def get_places_api(self):
+        """
+        This method is tasked with getting the places api from the Ordnance Survey.
+        """
+
+        if not self.api_key:
+            raise ValueError("Ordnance Survey API key not specified")
+
+        encoded_address_query = urllib.parse.quote(self.full_address)
+        url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
+               f"{self.api_key}")
+        response = requests.get(url)
+        if response.status_code == 200:
+            data = response.json()
+            results = data['results']
+            self.results = results
+
+            # Extract some details about the best match
+            self.most_relevant_result = self.results[0]["DPA"]
+
+            self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
+            self.set_places_address()
+
+        else:
+            logger.info("Could not find any results for the provided address and postcode")
+
+        return {"status": response.status_code}
+
+    def parse_classification_code(self, classification_code: str):
+        """
+        This function will convert the classification code, returned by the OS places api, to a property type that is
+        compatible with the EPC database.
+
+        The various classifications cane be found here:
+        https://osdatahub.os.uk/docs/places/technicalSpecification
+
+        Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
+        For these purposes, we do not need the full classification as this includes non-residential properties. We only
+        parse the ones of interest to us
+        :return:
+        """
+
+        value_map = {
+            # In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
+            'RD': {},
+            'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
+            'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
+            'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
+            'RD06': {'property_type': 'Flat'},
+        }
+
+        mapped = value_map.get(classification_code, {})
+        self.property_type = mapped.get("property_type", "")
+        self.built_form = mapped.get("built_form", "")
--- a/backend/Property.py
+++ b/backend/Property.py
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@ -0,0 +1,744 @@
+import os
+import time
+import re
+
+import usaddress
+import pandas as pd
+import numpy as np
+from epc_api.client import EpcClient
+from backend.OrdnanceSurvey import OrdnanceSuveyClient
+from BaseUtility import Definitions
+from utils.logger import setup_logger
+from typing import List
+from fuzzywuzzy import process
+
+logger = setup_logger()
+
+vartypes = {
+    'low-energy-fixed-light-count': "Int64",
+    # 'address': 'str',
+    # 'uprn-source': 'str',
+    'floor-height': 'float',
+    'heating-cost-potential': 'float',
+    'unheated-corridor-length': 'float',
+    'hot-water-cost-potential': 'float',
+    'construction-age-band': 'str',
+    'potential-energy-rating': 'str',
+    'mainheat-energy-eff': 'str',
+    'windows-env-eff': 'str',
+    'lighting-energy-eff': 'str',
+    'environment-impact-potential': "Int64",
+    'glazed-type': 'str',
+    'heating-cost-current': 'float',
+    # 'address3': 'str',
+    'mainheatcont-description': 'str',
+    'sheating-energy-eff': 'str',
+    'property-type': 'str',
+    'local-authority-label': 'str',
+    'fixed-lighting-outlets-count': "Int64",
+    'energy-tariff': 'str',
+    'mechanical-ventilation': 'str',
+    'hot-water-cost-current': 'str',
+    'county': 'str',
+    # 'postcode': 'str',
+    'solar-water-heating-flag': 'str',
+    'constituency': 'str',
+    'co2-emissions-potential': 'float',
+    'number-heated-rooms': 'float',
+    'floor-description': 'str',
+    'energy-consumption-potential': 'float',
+    'local-authority': 'str',
+    'built-form': 'str',
+    'number-open-fireplaces': "Int64",
+    'windows-description': 'str',
+    'glazed-area': 'str',
+    # 'inspection-date': str,
+    'mains-gas-flag': 'str',
+    'co2-emiss-curr-per-floor-area': 'float',
+    # 'address1': 'str',
+    'heat-loss-corridor': 'str',
+    'flat-storey-count': "Int64",
+    'constituency-label': 'str',
+    'roof-energy-eff': 'str',
+    'total-floor-area': 'float',
+    'building-reference-number': 'str',
+    'environment-impact-current': 'float',
+    'co2-emissions-current': 'float',
+    'roof-description': 'str',
+    'floor-energy-eff': 'str',
+    'number-habitable-rooms': 'float',
+    # 'address2': 'str',
+    'hot-water-env-eff': 'str',
+    'posttown': 'str',
+    'mainheatc-energy-eff': 'str',
+    'main-fuel': 'str',
+    'lighting-env-eff': 'str',
+    'windows-energy-eff': 'str',
+    'floor-env-eff': 'str',
+    'sheating-env-eff': 'str',
+    'lighting-description': 'str',
+    'roof-env-eff': 'str',
+    'walls-energy-eff': 'str',
+    'photo-supply': 'float',
+    'lighting-cost-potential': 'float',
+    'mainheat-env-eff': 'str',
+    'multi-glaze-proportion': 'float',
+    'main-heating-controls': 'str',
+    # 'lodgement-datetime',
+    'flat-top-storey': 'str',
+    'current-energy-rating': 'str',
+    'secondheat-description': 'str',
+    'walls-env-eff': 'str',
+    'transaction-type': 'str',
+    # 'uprn': "Int64",
+    'current-energy-efficiency': 'float',
+    'energy-consumption-current': 'float',
+    'mainheat-description': 'str',
+    'lighting-cost-current': 'float',
+    # 'lodgement-date',
+    'extension-count': "Int64",
+    'mainheatc-env-eff': 'str',
+    # 'lmk-key': 'str',
+    'wind-turbine-count': "Int64",
+    'tenure': 'str',
+    'floor-level': 'str',
+    'potential-energy-efficiency': "Int64",
+    'hot-water-energy-eff': 'str',
+    'low-energy-lighting': 'float',
+    'walls-description': 'str',
+    'hotwater-description': 'str'
+}
+
+
+class SearchEpc:
+    """
+    Given address information about a home, this class is responsible for retrieving the EPC data associated
+    to the property.
+
+    For a home, we might have address lines 1, 2, 3 and 4, as well as a postcode.
+
+    Often, simply searching the EPC database with address line 1 and postcode will be enough to find
+    the property, but there are some cases where this is not true and we might need to utilise other
+    combinations about the home to find the property
+    """
+
+    MAX_RETRIES = 5
+
+    SUCCESS = {
+        "status": 200,
+        "message": "success",
+        "error": None
+    }
+
+    NODATA = {
+        "status": 201,
+        "message": "No data",
+        "error": None
+    }
+
+    def __init__(
+        self,
+        address1: str,
+        postcode: str,
+        auth_token: str,
+        os_api_key: str,
+        full_address: str | None = None,
+        max_retries: int = None,
+        uprn: [int, None] = None,
+        size=None,
+        property_type=None,
+        fast=False
+    ):
+        """
+        Address lines 1 and postcode are mandatory fields. The other address lines are optional
+        but can be used to find the epc for the home, if address1 and postcode are insufficient
+        :param address1: string, propery's address line 1
+        :param postcode: string, propery's postcode
+        :param full_address: string, optional parameter, the full address of the property
+        :param max_retries: int, optional, number of retries to make when searching the api
+        :param uprn: int, optional, the uprn of the property
+        :param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
+                        default
+        :param property_type: str, optional, the property type of the property, if known before hand
+        """
+
+        self.address1 = address1
+        self.postcode = postcode
+        self.full_address = full_address
+        self.uprn = uprn
+        self.house_number = self.get_house_number(self.address1)
+        self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
+
+        self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES
+
+        self.client = EpcClient(auth_token=auth_token)
+        self.ordnance_survey_client = OrdnanceSuveyClient(
+            address=self.address1, postcode=self.postcode, api_key=os_api_key
+        )
+
+        self.data = None
+        self.newest_epc = None
+        self.older_epcs = None
+        self.full_sap_epc = None
+
+        # These are the address and postcode values, which we store in the database
+        self.address_clean = None
+        self.postcode_clean = None
+
+        self.size = size if size is not None else 25
+
+        self.property_type = property_type
+        self.fast = fast
+
+    @classmethod
+    def get_house_number(cls, address: str) -> str | None:
+        """
+        This method will use the usaddress library to parse an address and extract the house number
+        :return:
+        """
+
+        parsed = usaddress.parse(address)
+        parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
+        parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
+
+        if parsed_house_number is None:
+            # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
+            # we also add a custom approach
+
+            # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
+            pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
+
+            match = re.search(pattern, address)
+
+            if match:
+                # Return the first non-None group found
+                return next(g for g in match.groups() if g is not None)
+            else:
+                return None
+
+        # Remove training commas
+        parsed_house_number = parsed_house_number.replace(",", "")
+
+        return parsed_house_number
+
+    @staticmethod
+    def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
+        # Regular expression to find the first occurrence of one or more digits
+
+        if house_number is None:
+            return None
+
+        match = re.search(r'\d+', house_number)
+
+        if match:
+            return int(match.group())
+        else:
+            return None
+
+    def get_epc(self, params=None, size=None):
+        # Get the EPC data with retries
+        size = size if size is not None else self.size
+        if params is None:
+            if self.uprn:
+                params = {"uprn": self.uprn}
+            else:
+                params = {"address": self.address1, "postcode": self.postcode}
+
+        for retry in range(self.max_retries):
+            try:
+
+                if "uprn" in params:
+                    # We use the direct call method inside, since we need to implement uprn as a valid
+                    # parameter for the search function
+                    url = os.path.join(self.client.domestic.host, "search")
+                    response = self.client.domestic.call(method="get", url=url, params=params)
+                else:
+                    response = self.client.domestic.search(params=params, size=size)
+
+                if response:
+                    self.data = response
+                    return self.SUCCESS
+
+                if retry > 0:
+                    logger.info("Failed previous attempt but retry successful")
+                # If we got nothing, final try
+                if not response:
+                    return {
+                        "status": 204,
+                        "message": "no data",
+                        "error": None
+                    }
+
+                return {
+                    "status": 200,
+                    "message": "success",
+                    "error": None
+                }
+
+            except Exception as e:
+                if retry < self.max_retries - 1:
+                    # If not the last retry, wait for 3 seconds before retrying
+                    time.sleep(3)
+                else:
+                    # If it's the last retry, we continue
+                    return {
+                        "status": 500,
+                        "message": "Could not retrieve EPC data",
+                        "error": str(e)
+                    }
+
+    @staticmethod
+    def filter_rows(rows, property_type=None, address=None):
+        """
+        This method should not be used when property_type and address are both not None
+        :param rows:
+        :param property_type:
+        :param address:
+        :return:
+        """
+        # Given the results from the EPC api, attempts to reduce the number of rows
+        uprns = {r["uprn"] for r in rows}
+
+        if (property_type is None) and (address is None):
+            return rows
+
+        if len(uprns) == 1:
+            return rows
+
+        if property_type is not None:
+            # We can do a filter on the property type
+            rows_filtered = [r for r in rows if r["property-type"] == property_type]
+
+            if rows_filtered:
+                return rows_filtered
+
+            return rows
+
+        if address is not None:
+            # We can do a filter on the property type
+            best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
+            rows_filtered = [r for r in rows if r["address"] == best_match[0]]
+
+            if rows_filtered:
+                return rows_filtered
+
+            return rows
+
+    @staticmethod
+    def format_address(newest_epc):
+        """
+        Format address and postcode for storage in the database
+        """
+        postcode = newest_epc["postcode"]
+        address = newest_epc["address"]
+
+        # Format them
+        address = address.replace(postcode, "").strip()
+        address = address.rstrip(",").strip()
+        address = address.title()
+
+        postcode = postcode.upper()
+
+        return address, postcode
+
+    def extract_epc_data(self, address=None):
+
+        """
+        Given a successful search, this method will format the data and return it
+        :return:
+        """
+
+        if self.data is None:
+            raise ValueError("data is missing, run search first")
+
+        rows = self.data["rows"]
+
+        # We perform some checks on the rows
+        # Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the
+        # property further
+
+        rows = self.filter_rows(rows, property_type=self.property_type, address=None)
+        rows = self.filter_rows(rows, property_type=None, address=address)
+
+        # We now check for a full sap epc:
+        full_sap_epc = [r for r in rows if r["transaction-type"] == "new dwelling"]
+        full_sap_epc = full_sap_epc[0] if full_sap_epc else {}
+
+        # Finally, we identify the newest epc and the rest, and then return
+        newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
+
+        # Ge the uprn from the newest record for this home
+        uprns = {r["uprn"] for r in rows if r["uprn"]}
+        # We can sometimes have no uprn for a property
+        if (len(uprns) == 0) and len(rows) > 0:
+            logger.warning("Found data but missing uprn")
+        elif len(uprns) != 1:
+            # There is a possibility that we have multiple UPRNs for a single property, which is an error
+            addresses = {r["address"] for r in rows}
+            if len(addresses) == 1:
+                # Take the uprn from the most recent
+                uprns = {newest_epc["uprn"]}
+            else:
+                raise ValueError("Multiple UPRNs found - investigate me")
+
+        uprn = uprns.pop() if uprns else None
+
+        if self.fast:
+            return newest_epc, [], {}, "", "", None
+
+        # Retrieve postcode and address
+        address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
+
+        return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
+
+    @staticmethod
+    def filter_newest_epc(list_of_epcs: List):
+        newest_response = [
+            r for r in list_of_epcs if
+            r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in list_of_epcs])
+        ]
+
+        if not newest_response:
+            return {}, []
+
+        if len(newest_response) != 1:
+            # It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that
+            # were lodged at the exact same time. In this case, we will take the first one
+            newest_response = [newest_response[0]]
+
+        older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]
+
+        return newest_response[0], older_epcs
+
+    @staticmethod
+    def _get_epc_mode(col: str, epc_data: pd.DataFrame):
+        """
+        Simple method to extract the mode value from the EPC data
+        :param col: name of the column to take the mode of
+        :param epc_data: pandas dataframe of epc data
+        """
+
+        mode_value = epc_data[[col]].mode(dropna=True)
+        if len(mode_value) != 1:
+            raise NotImplementedError("TODO: Handle multiple modes")
+        mode_value = mode_value.iloc[0][col]
+
+        return mode_value
+
+    def fetch_nearby_epcs(
+        self, initial_postcode: str,
+        lmks_to_drop: list[str] | None = None,
+        built_form: str = "",
+        property_type: str = ""
+    ):
+        """
+        Fetches and processes EPC data for a given initial postcode, applying successive trimming
+        to the postcode and filtering the data until a non-empty result set is found.
+
+        The function queries the EPC API with the provided postcode, and if no data is found or
+        if the data doesn't meet certain criteria, it progressively shortens the postcode by
+        removing the last character and retries the query. This process continues until a valid
+        set of EPC data is obtained or the postcode is exhausted.
+
+        Additional filtering is applied to the obtained EPC data based on 'lmk-key', 'built-form',
+        and 'property-type'. The data is also processed to extract and numerically interpret house
+        numbers, calculate house number distances, and apply weights based on these distances.
+
+        :param initial_postcode: The initial full postcode for the EPC data query.
+        :param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
+        :param built_form: The 'built-form' value to be used for filtering the EPC data.
+        :param property_type: The 'property-type' value to be used for filtering the EPC data.
+        :return:
+        """
+
+        property_type_api_map = {
+            "Bungalow": "bungalow",
+            "Flat": "flat",
+            "House": "house",
+            "Maisonette": "maisonette",
+            "Park home": "park home",
+        }
+
+        postcode = initial_postcode
+        while postcode:
+            # Fetch data from EPC API
+            params = {"postcode": postcode}
+            if property_type:
+                params["property-type"] = property_type_api_map[property_type]
+
+            # We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes
+            epc_response = self.get_epc(params=params, size=100)
+
+            if epc_response["status"] == 200:
+                epc_data = pd.DataFrame(self.data["rows"])
+
+                if lmks_to_drop is not None:
+                    epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]
+
+                if not epc_data.empty:
+                    # Further processing of the EPC data
+                    epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], errors='coerce')
+                    epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
+                    epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
+                    epc_data["numeric_house_number"] = epc_data["house_number"].apply(
+                        lambda house_num: self.extract_numeric_housenumber_part(house_num)
+                    )
+
+                    if self.numeric_house_number is None:
+                        # If we don't have a house number, we treat all weights as equal
+                        epc_data["weight"] = 1
+                    else:
+                        epc_data["house_number_distance"] = abs(
+                            epc_data["numeric_house_number"] - self.numeric_house_number
+                        )
+                        # # We add 1, just in case we have a 0 weight (e.g. comparing house number 7a to 7b, or 9A to 9)
+                        # epc_data["weight"] = 1 / (epc_data["house_number_distance"] + 1)
+                        # # If we have a home without a house number, fill that weight with average
+                        # epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
+                        # # Finally, we might not have any house numbers whatsoever so everything could be
+                        # # missing, so we fill with 1
+                        # epc_data["weight"] = epc_data["weight"].fillna(1)
+                        # TODO: Testing
+                        # If the postcode is different from the initial postcode, it doesn't make sense to have
+                        # any weightings
+                        if all(pd.isnull(epc_data["house_number_distance"])) or (postcode != initial_postcode):
+                            epc_data["weight"] = 1
+                        else:
+                            epc_data["weight"] = 1 / np.sqrt(epc_data["house_number_distance"] + 1)
+                            epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
+
+                    estimation_property_type = self._estimate_str(
+                        key="property-type", estimation_data=epc_data
+                    ) if property_type == "" else property_type
+
+                    epc_built_form = self._estimate_str(
+                        key="built-form",
+                        estimation_data=epc_data[epc_data["property-type"] == estimation_property_type]
+                    )
+
+                    if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]:
+                        estimation_built_form = "End-Terraced"
+                    elif (built_form == "") or (pd.isnull(built_form)):
+                        estimation_built_form = epc_built_form
+                    else:
+                        estimation_built_form = built_form
+
+                    # We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
+                    # on maisonette
+                    # We also add some additional logic for Park homes, because they are far less common than other
+                    # property types
+
+                    is_maisonette_with_bad_built_form = (estimation_property_type == "Maisonette") & (
+                        estimation_built_form in ["Detached", "Semi-Detached"]
+                    )
+
+                    is_park_home_without_built_form = (estimation_property_type == "Park home") & (
+                        sum(epc_data["built-form"] == estimation_built_form) == 0
+                    )
+
+                    has_missing_built_form = not estimation_built_form
+
+                    if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
+                        epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
+                    else:
+                        epc_data = epc_data[
+                            (epc_data["built-form"] == estimation_built_form) & (
+                                epc_data["property-type"] == estimation_property_type)
+                            ]
+
+                    if not epc_data.empty:
+                        return epc_data  # Return the filtered data if it's not empty
+
+            # Shorten the postcode by one character for the next iteration
+            postcode = postcode[:-1].rstrip()
+
+        # If loop finishes without a valid response, raise an exception
+        raise Exception("Unable to find postcode data after trimming - investigate me")
+
+    def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
+        """
+        For a property that does not have an EPC, we retrieve the EPC data for the closest properties
+        and estimate the EPC for the property in question.
+
+        Note - do we have postcodes with just a single address? We would need to use a different approach
+        to find the closest homes
+        :param property_type:   This is the property type of the property we are estimating, that can be retrieved from
+                                the ordnance survey api
+        :param built_form:      This is the built form of the property we are estimating, that can be retrieved from
+                                the ordnance survey api
+        :param lmks_to_drop:    This is a list of LMK keys that should be dropped from the estimation process. This
+                                is used as an override for testing, to drop EPCs for the property we are testing
+        :return:
+        """
+
+        # From the ordnance survey data, we want to determine the property type and then use only similar property
+        # types for the estimation process
+        epc_data = self.fetch_nearby_epcs(
+            initial_postcode=self.postcode,
+            lmks_to_drop=lmks_to_drop,
+            built_form=built_form,
+            property_type=property_type
+        )
+
+        # If we have missing lodgment date, we fill it with inspection-date
+        epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
+        # If we still have missing dates, we set it to the mean of the non NA dates
+        epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
+
+        # For each attribute, we need to determine the datatype and use an appropriate method
+        # to estimate.
+        estimated_epc = {}
+        for key, vartype in vartypes.items():
+            epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
+            epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
+            estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
+            estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
+            estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
+            if vartype == "Int64":
+                # We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
+                # so this handles this
+                estimation_data[key] = estimation_data[key].astype(float).astype(vartype)
+            else:
+                estimation_data[key] = estimation_data[key].astype(vartype)
+
+            if estimation_data.shape[0] == 0:
+                estimated_epc[key] = None
+                continue
+
+            if vartype == "Int64":
+                estimated_value = self._estimate_int(estimation_data, key)
+            elif vartype == "float":
+                estimated_value = self._estimate_float(estimation_data, key)
+            elif vartype == "str":
+                estimated_value = self._estimate_str(estimation_data, key)
+            else:
+                raise NotImplementedError("estimation method not implemented for type")
+
+            estimated_epc[key] = estimated_value
+
+        # Insert an estimated lodgement datetime, with a weighted average
+        estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
+        # Extract logement date
+        # It is possible that there is still no lodgement date, so we need to handle this
+        if pd.isnull(estimated_epc["lodgement-datetime"]):
+            estimated_epc["lodgement-date"] = None
+        else:
+            estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
+
+        estimated_epc["postcode"] = self.postcode
+        estimated_epc["uprn"] = self.uprn
+        estimated_epc["address"] = self.full_address
+        # Indicate that this epc was estimated
+        estimated_epc["estimated"] = True
+
+        return estimated_epc
+
+    @staticmethod
+    def calculate_weighted_lodgement_datetime(epc_data):
+        numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
+
+        # Calculate the weighted sum of dates
+        weighted_sum = (numeric_dates * epc_data['weight']).sum()
+
+        # Calculate the sum of weights
+        total_weights = epc_data['weight'].sum()
+
+        # Calculate the weighted mean in numeric format
+        weighted_mean_numeric = weighted_sum / total_weights
+
+        # Convert the numeric weighted mean back to datetime
+        weighted_mean_datetime = pd.to_datetime(weighted_mean_numeric)
+
+        return weighted_mean_datetime
+
+    @staticmethod
+    def _estimate_int(estimation_data, key):
+        return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]))
+
+    @staticmethod
+    def _estimate_float(estimation_data, key):
+        return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]), 2)
+
+    @staticmethod
+    def _estimate_str(estimation_data, key):
+        agg = estimation_data.groupby(key)["weight"].sum().reset_index()
+        agg = agg[agg["weight"] == agg["weight"].max()]
+        if agg.shape[0] != 1:
+            # If we have multiple modes, we take the more recent data on average
+            recent_grouped = estimation_data[
+                estimation_data[key].isin(agg[key].values)
+            ].groupby(key)["lodgement-datetime"].mean()
+
+            newest_group = recent_grouped.idxmax()
+            return newest_group
+
+        return agg[key].values[0]
+
+    def find_property(self, skip_os=False):
+        """
+        This method will attempt to identify a property. It will, at first, use the EPC api to try and
+        find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to
+        find the UPRN of the address.
+
+        Because no result may have been provided by the EPC api because of formatting issues with the address,
+        if the ordnance survey api is used and the uprn retrieved, the EPC api is queried again with the UPRN, just
+        as a final check to see if there is any EPC data.
+
+        If there is no EPC data, the epc data will be estimated based on the surrounding properties
+        """
+
+        # Step 1: use the epc api to find the property and uprn
+        response = self.get_epc()
+
+        if response["status"] == 200:
+            (
+                self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
+            ) = self.extract_epc_data(address=self.full_address)
+            return
+
+        # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn
+        if skip_os:
+            if self.ordnance_survey_client.property_type is not None:
+                # We can try and estimate
+                estimated_epc = self.estimate_epc(
+                    property_type=self.ordnance_survey_client.property_type,
+                    built_form=self.ordnance_survey_client.built_form
+                )
+                self.newest_epc = estimated_epc
+                self.older_epcs = []
+                self.full_sap_epc = {}
+
+                # Finally, set a standardised address 1 and postcode
+                self.address_clean = self.ordnance_survey_client.address_os
+                self.postcode_clean = self.ordnance_survey_client.postcode_os
+            return
+
+        os_response = self.ordnance_survey_client.get_places_api()
+
+        if os_response["status"] != 200:
+            # Investigate this if it happens
+            raise Exception("Unable to find property - investigate me")
+
+        # Step 3: Now that we have a urpn, do another check against the epc api, this time searching with the uprn
+        self.uprn = self.ordnance_survey_client.most_relevant_result["UPRN"]
+        response = self.get_epc()
+        if response["status"] == 200:
+            (
+                self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
+            ) = self.extract_epc_data()
+            return
+
+        # Step 4: If we still don't have an EPC, we estimate the EPC data
+        self.full_address = self.ordnance_survey_client.most_relevant_result["ADDRESS"]
+        estimated_epc = self.estimate_epc(
+            property_type=self.ordnance_survey_client.property_type,
+            built_form=self.ordnance_survey_client.built_form
+        )
+        self.newest_epc = estimated_epc
+        self.older_epcs = []
+        self.full_sap_epc = {}
+
+        # Finally, set a standardised address 1 and postcode
+        self.address_clean = self.ordnance_survey_client.address_os
+        self.postcode_clean = self.ordnance_survey_client.postcode_os
+        return
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -8,9 +8,12 @@ class Settings(BaseSettings):
    SECRET_KEY: str
    ENVIRONMENT: str
    DATA_BUCKET: str
-    PREDICTIONS_BUCKET: str
+    SAP_PREDICTIONS_BUCKET: str
+    CARBON_PREDICTIONS_BUCKET: str
+    HEAT_PREDICTIONS_BUCKET: str
    PLAN_TRIGGER_BUCKET: str
    EPC_AUTH_TOKEN: str
+    ORDNANCE_SURVEY_API_KEY: str
    DB_HOST: str
    DB_PASSWORD: str
    DB_USERNAME: str
--- a/backend/app/db/functions/non_intrusive_surveys.py
+++ b/backend/app/db/functions/non_intrusive_surveys.py
@ -0,0 +1,50 @@
+from sqlalchemy.orm import Session
+from backend.app.db.models.non_intrusive_surveys import NonIntrusiveSurvey, NonIntrusiveSurveyNotes
+
+
+def upload_non_intrusive_survey_notes(session: Session, non_invasive_notes, batch_size=500):
+    """
+    Uploads a list of non-intrusive survey notes into the database in batches. Each dictionary in the list represents
+    one survey and its associated notes.
+
+    :param session: SQLAlchemy Session object through which all database transactions are handled.
+    :param non_invasive_notes: List of dictionaries where each dictionary contains survey details including 'uprn',
+                               'survey_date', 'surveyor', and other notes as key-value pairs.
+    :param batch_size: The size of each batch to be processed (default is 500).
+    :return: None
+    """
+
+    # Helper function to process each batch
+    def process_batch(batch):
+        surveys = []
+        notes = []
+
+        for note in batch:
+            survey = NonIntrusiveSurvey(
+                uprn=note['uprn'],
+                survey_date=note['survey_date'],
+                surveyor=note['surveyor']
+            )
+            surveys.append(survey)
+
+        session.add_all(surveys)
+        session.flush()  # Get IDs for surveys
+
+        for note, survey in zip(batch, surveys):
+            for key, value in note.items():
+                if key not in ['uprn', 'survey_date', 'surveyor']:
+                    notes.append(NonIntrusiveSurveyNotes(
+                        survey_id=survey.id,
+                        title=key,
+                        note=value
+                    ))
+
+        session.bulk_save_objects(notes)
+        session.commit()
+
+    # Split the data into batches and process each batch
+    total = len(non_invasive_notes)
+    for start in range(0, total, batch_size):
+        end = min(start + batch_size, total)
+        batch = non_invasive_notes[start:end]
+        process_batch(batch)
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@ -3,15 +3,17 @@ from backend.app.db.models.recommendations import Plan, PlanRecommendations, Rec
 from backend.app.db.models.portfolio import Portfolio


-def aggregate_portfolio_recommendations(session, portfolio_id: int):
+def aggregate_portfolio_recommendations(
+    session, portfolio_id: int, total_valuation_increase: float, labour_days: float
+):
    # Aggregate multiple fields
    aggregates = (
        session.query(
            func.sum(Recommendation.estimated_cost).label("cost"),
            func.sum(Recommendation.total_work_hours).label("total_work_hours"),
-            # For future usage we will aggregate multiple fields in this step
-            # func.sum(Recommendation.heat_demand).label("total_heat_demand"),
-            # func.sum(Recommendation.energy_savings).label("total_energy_savings")
+            func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"),
+            func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
+            func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
        )
        .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
        .join(Plan, Plan.id == PlanRecommendations.plan_id)
@ -22,8 +24,9 @@ def aggregate_portfolio_recommendations(session, portfolio_id: int):
    aggregates_dict = {
        "cost": aggregates.cost or 0,
        "total_work_hours": aggregates.total_work_hours or 0,
-        # "total_heat_demand": aggregates.total_heat_demand or 0,
-        # "total_energy_savings": aggregates.total_energy_savings or 0
+        "energy_savings": aggregates.energy_savings or 0,
+        "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
+        "energy_cost_savings": aggregates.energy_cost_savings or 0,
    }

    # Get the portfolio and update the fields
@ -32,6 +35,10 @@ def aggregate_portfolio_recommendations(session, portfolio_id: int):
    for key, value in aggregates_dict.items():
        setattr(portfolio, key, value)

+    # Insert total valuation increase and labour days
+    portfolio.property_valuation_increase = total_valuation_increase
+    portfolio.labour_days = labour_days
+
    # Merge the updated portfolio back into the session
    session.merge(portfolio)
    session.flush()
--- a/backend/app/db/functions/property_functions.py
+++ b/backend/app/db/functions/property_functions.py
@ -3,13 +3,15 @@
 ###
 import datetime
 import pytz
+from sqlalchemy.orm import Session
 from backend.app.db.models.portfolio import (
-    PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel
+    PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel,
+    PropertyDetailsSpatial
 )
 from sqlalchemy.orm.exc import NoResultFound


-def create_property(session, portfolio_id: int, address: str, postcode: str) -> (int, bool):
+def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str) -> (int, bool):
    """
    This function will create a record for the property in the database if it does not exist.
    If it does exist, it will just update the updated_at field.
@ -23,7 +25,7 @@ def create_property(session, portfolio_id: int, address: str, postcode: str) ->
    try:
        # Attempt to fetch the existing property
        existing_property = session.query(PropertyModel).filter_by(
-            address=address, postcode=postcode, portfolio_id=portfolio_id
+            uprn=uprn, portfolio_id=portfolio_id
        ).one()

        # Update the 'updated_at' field
@ -41,6 +43,7 @@ def create_property(session, portfolio_id: int, address: str, postcode: str) ->
            address=address,
            postcode=postcode,
            portfolio_id=portfolio_id,
+            uprn=uprn,
            creation_status=PropertyCreationStatus.LOADING,
            status=PortfolioStatus.ASSESSMENT.value,
            has_pre_condition_report=False,
@ -55,7 +58,9 @@ def create_property(session, portfolio_id: int, address: str, postcode: str) ->
        return new_property.id, True


-def create_property_targets(session, property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None):
+def create_property_targets(
+    session: Session, property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None
+):
    """
    This function will create a record for the property targets in the database if it does not exist.
    :param session: The database session
@ -78,7 +83,9 @@ def create_property_targets(session, property_id: int, portfolio_id: int, epc_ta
    return True


-def update_property_data(session, property_id: int, portfolio_id: int, property_data: dict):
+def update_property_data(
+    session: Session, property_id: int, portfolio_id: int, property_data: dict
+):
    now = datetime.datetime.now(pytz.utc)

    try:
@ -103,7 +110,9 @@ def update_property_data(session, property_id: int, portfolio_id: int, property_
    return True


-def create_property_details_epc(session, property_details_epc: dict):
+def create_property_details_epc(
+    session: Session, property_details_epc: dict
+):
    """
    This function will create or update a record for the property details EPC in the database.
    :param session: The database session
@ -128,3 +137,36 @@ def create_property_details_epc(session, property_details_epc: dict):
    session.flush()

    return True
+
+
+def update_or_create_property_spatial_details(session: Session, uprn: int, property_details_spatial: dict):
+    """
+    Update an existing property details record or create a new one based on the UPRN.
+
+    :param session: The SQLAlchemy session for database interaction.
+    :param uprn: The unique property reference number (UPRN) of the property.
+    :param property_details_spatial: A dictionary containing the spatial property details to store or update.
+    :return: True if the operation is successful, otherwise raises an exception.
+    """
+
+    try:
+        # Attempt to fetch the existing property details
+        existing_property_details = session.query(PropertyDetailsSpatial).filter_by(
+            uprn=uprn
+        ).one()
+
+        # Update the fields with the data in property_details
+        for key, value in property_details_spatial.items():
+            setattr(existing_property_details, key, value)
+
+        # Merge the updated property details back into the session and flush
+        session.merge(existing_property_details)
+        session.flush()
+
+    except NoResultFound:
+        # Create a new record if not found
+        new_property_details = PropertyDetailsSpatial(uprn=uprn, **property_details_spatial)
+        session.add(new_property_details)
+        session.flush()
+
+    return True
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@ -80,7 +80,13 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
            "starting_u_value": rec.get("starting_u_value"),
            "new_u_value": rec.get("new_u_value"),
            "sap_points": rec["sap_points"],
+            "heat_demand": rec["heat_demand"],
+            "adjusted_heat_demand": rec["adjusted_heat_demand"],
+            "co2_equivalent_savings": rec["co2_equivalent_savings"],
            "total_work_hours": rec["labour_hours"],
+            "energy_cost_savings": rec["energy_cost_savings"],
+            "labour_days": rec["labour_days"],
+            "already_installed": rec["already_installed"],
        }
        for rec in recommendations_to_upload
    ]
--- a/backend/app/db/models/materials.py
+++ b/backend/app/db/models/materials.py
@ -18,6 +18,7 @@ class MaterialType(enum.Enum):
    exposed_floor_insulation = "exposed_floor_insulation"
    flat_roof_insulation = "flat_roof_insulation"
    room_roof_insulation = "room_roof_insulation"
+    windows_glazing = "windows_glazing"

    iwi_wall_demolition = "iwi_wall_demolition"
    iwi_vapour_barrier = "iwi_vapour_barrier"
@ -32,6 +33,10 @@ class MaterialType(enum.Enum):
    ewi_wall_demolition = "ewi_wall_demolition"
    ewi_wall_preparation = "ewi_wall_preparation"
    ewi_wall_redecoration = "ewi_wall_redecoration"
+    low_energy_lighting_installation = "low_energy_lighting_installation"
+    flat_roof_preparation = "flat_roof_preparation"
+    flat_roof_vapour_barrier = "flat_roof_vapour_barrier"
+    flat_roof_waterproofing = "flat_roof_waterproofing"


 class DepthUnit(enum.Enum):
@ -42,6 +47,7 @@ class CostUnit(enum.Enum):
    gbp_sq_meter = "gbp_sq_meter"
    gbp_per_unit = "gbp_per_unit"
    gbp_per_m2 = "gbp_per_m2"
+    gbp_per_m = "gbp_per_m"


 class RValueUnit(enum.Enum):
--- a/backend/app/db/models/non_intrusive_surveys.py
+++ b/backend/app/db/models/non_intrusive_surveys.py
@ -0,0 +1,22 @@
+from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer
+from sqlalchemy.orm import declarative_base
+
+Base = declarative_base()
+
+
+class NonIntrusiveSurvey(Base):
+    __tablename__ = 'non_intrusive_survey'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    uprn = Column(Integer, nullable=False)
+    survey_date = Column(TIMESTAMP, nullable=False)
+    surveyor = Column(String, nullable=False)
+
+
+class NonIntrusiveSurveyNotes(Base):
+    __tablename__ = 'non_intrusive_survey_notes'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False)
+    title = Column(String, nullable=False)
+    note = Column(String, nullable=False)
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@ -42,6 +42,7 @@ class Portfolio(Base):
    property_valuation_increase = Column(Float)  # Unit is always £ so we don't need to store the unit for the moment
    rental_yield_increase = Column(Float)  # Unit is always £ so we don't need to store the unit for the moment
    total_work_hours = Column(Float)
+    labour_days = Column(Float)
    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
    updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))

@ -85,6 +86,7 @@ class PropertyModel(Base):
    tenure = Column(Text)
    current_epc_rating = Column(Enum(Epc))
    current_sap_points = Column(Float)
+    current_valuation = Column(Float)


 class FeatureRating(enum.Enum):
@ -151,6 +153,21 @@ class PropertyDetailsEpcModel(Base):
    energy_tariff = Column(Text)
    primary_energy_consumption = Column(Float)
    co2_emissions = Column(Float)
+    adjusted_energy_consumption = Column(Float)
+    estimated = Column(Boolean, default=False)
+
+
+class PropertyDetailsSpatial(Base):
+    __tablename__ = "property_details_spatial"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    uprn = Column(Integer, nullable=False)
+    x_coordinate = Column(Float)
+    y_coordinate = Column(Float)
+    latitude = Column(Float)
+    longitude = Column(Float)
+    conservation_status = Column(Boolean)
+    is_listed_building = Column(Boolean)
+    is_heritage_building = Column(Boolean)


 class PropertyDetailsMeter(Base):
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@ -22,12 +22,15 @@ class Recommendation(Base):
    new_u_value = Column(Float)
    sap_points = Column(Float)
    heat_demand = Column(Float)
+    adjusted_heat_demand = Column(Float)
    co2_equivalent_savings = Column(Float)
    energy_savings = Column(Float)
    energy_cost_savings = Column(Float)
    property_valuation_increase = Column(Float)
    rental_yield_increase = Column(Float)
    total_work_hours = Column(Float)
+    labour_days = Column(Float)
+    already_installed = Column(Boolean, nullable=False, default=False)


 class RecommendationMaterials(Base):
@ -51,6 +54,9 @@ class Plan(Base):
    property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
    is_default = Column(Boolean, nullable=False)
+    valuation_increase_lower_bound = Column(Float)
+    valuation_increase_upper_bound = Column(Float)
+    valuation_increase_average = Column(Float)


 class PlanRecommendations(Base):
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -1,7 +1,9 @@
 from datetime import datetime

+from tqdm import tqdm
 import pandas as pd
-from epc_api.client import EpcClient
+from etl.epc.Record import EPCRecord
+from backend.SearchEpc import SearchEpc
 from fastapi import APIRouter, Depends
 from sqlalchemy.exc import IntegrityError, OperationalError
 from sqlalchemy.orm import sessionmaker
@ -12,7 +14,8 @@ from backend.app.db.connection import db_engine
 from backend.app.db.functions.materials_functions import get_materials
 from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
 from backend.app.db.functions.property_functions import (
-    create_property, create_property_details_epc, create_property_targets, update_property_data
+    create_property, create_property_details_epc, create_property_targets, update_property_data,
+    update_or_create_property_spatial_details
 )
 from backend.app.db.functions.recommendations_functions import (
    create_plan, create_plan_recommendations, upload_recommendations
@ -20,29 +23,39 @@ from backend.app.db.functions.recommendations_functions import (
 from backend.app.db.models.portfolio import rating_lookup
 from backend.app.dependencies import validate_token
 from backend.app.plan.schemas import PlanTriggerRequest
-from backend.app.plan.utils import (
-    create_recommendation_scoring_data, get_cleaned, insert_temp_recommendation_id
-)
-from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, read_parquet_from_s3
+from backend.app.plan.utils import get_cleaned
+from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc

-from backend.ml_models.sap_change_model.api import SAPChangeModelAPI
+from backend.ml_models.api import ModelApi
 from backend.Property import Property
-from etl.epc.DataProcessor import DataProcessor
-from etl.epc.settings import COLUMNS_TO_MERGE_ON
-from recommendations.FloorRecommendations import FloorRecommendations
-from recommendations.RoofRecommendations import RoofRecommendations
-from recommendations.VentilationRecommendations import VentilationRecommendations
-from recommendations.FireplaceRecommendations import FireplaceRecommendations
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
 from recommendations.optimiser.CostOptimiser import CostOptimiser
 from recommendations.optimiser.GainOptimiser import GainOptimiser
 from recommendations.optimiser.optimiser_functions import prepare_input_measures
-from recommendations.WallRecommendations import WallRecommendations
+from recommendations.Recommendations import Recommendations
 from utils.logger import setup_logger
-from utils.s3 import read_dataframe_from_s3_parquet
+from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
+from backend.ml_models.Valuation import PropertyValuation

 logger = setup_logger()

 BATCH_SIZE = 5
+SCORING_BATCH_SIZE = 400
+
+
+def patch_epc(patch, epc_records):
+    """
+    This utility function is useful to patch the epc data if we have data from the customer
+    :return:
+    """
+
+    for patch_variable, patch_value in patch.items():
+        if patch_variable in epc_records["original_epc"]:
+            epc_records["original_epc"][patch_variable] = patch_value
+
+    return epc_records
+

 router = APIRouter(
    prefix="/plan",
@ -58,31 +71,52 @@ async def trigger_plan(body: PlanTriggerRequest):
    session = sessionmaker(bind=db_engine)()
    created_at = datetime.now().isoformat()

+    # TODO: We should store the trigger file path in the database with the plan so we can track the file that
+    #       triggered the plan
+
+    # TODO: if the measure is already installed, it should actually be the very first phase
+
    try:
        session.begin()
        logger.info("Getting the inputs")
-        epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
-        uprn_filenames = read_dataframe_from_s3_parquet(
-            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
-        )
-        cleaning_data = read_parquet_from_s3(
+        # If we have patches or overrides, we should read them in here
+        patches = []
+        if body.patches_file_path:
+            patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
+
+        already_installed = []
+        if body.already_installed_file_path:
+            already_installed = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
+            )
+
+        cleaning_data = read_dataframe_from_s3_parquet(
            bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
        )

        input_properties = []
-        for config in plan_input:
+        for config in tqdm(plan_input):
            # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
-            # TODO: implment validation. We should also standardise postcode and address in some fashion as
-            #       a postcode of abcdef would be considered different to ABCDEF
+            uprn = config.get("uprn", None)
+            if uprn:
+                uprn = int(float(uprn))
+
+            epc_searcher = SearchEpc(
+                address1=config["address"],
+                postcode=config["postcode"],
+                uprn=uprn,
+                auth_token=get_settings().EPC_AUTH_TOKEN,
+                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
+            )
+            epc_searcher.find_property()
            # Create a record in db
            property_id, is_new = create_property(
-                session, portfolio_id=body.portfolio_id, address=config['address'], postcode=config['postcode']
+                session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
            )
-            # if a new record was not created, we don't produduce recommendations
            if not is_new:
                continue
-            # TODO: Need to add heat demand target
+
            create_property_targets(
                session,
                property_id=property_id,
@ -91,24 +125,41 @@ async def trigger_plan(body: PlanTriggerRequest):
                heat_demand_target=None
            )

+            epc_records = {
+                'original_epc': epc_searcher.newest_epc.copy(),
+                'full_sap_epc': epc_searcher.full_sap_epc.copy(),
+                'old_data': epc_searcher.older_epcs.copy(),
+            }
+
+            patch = next((
+                x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), {})
+            epc_records = patch_epc(patch, epc_records)
+
+            prepared_epc = EPCRecord(
+                epc_records=epc_records,
+                run_mode="newdata",
+                cleaning_data=cleaning_data
+            )
+
+            property_already_installed = next((
+                x for x in already_installed if
+                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), {})
            input_properties.append(
                Property(
-                    postcode=config['postcode'],
-                    address1=config['address'],
-                    epc_client=epc_client,
-                    id=property_id
+                    id=property_id,
+                    address=epc_searcher.address_clean,
+                    postcode=epc_searcher.postcode_clean,
+                    epc_record=prepared_epc,
+                    already_installed=property_already_installed,
+                    **Property.extract_kwargs(config)
                )
            )

        if not input_properties:
            return Response(status_code=204)

-        logger.info("Getting EPC, and spatial data")
-        for p in input_properties:
-            p.search_address_epc()
-            p.set_year_built()
-            p.get_spatial_data(uprn_filenames)
-
        # The materials data could be cached or local so we don't need to make
        # consistent requests to the backend for
        # the same data
@ -116,173 +167,112 @@ async def trigger_plan(body: PlanTriggerRequest):
        materials = get_materials(session)
        cleaned = get_cleaned()

+        uprn_filenames = read_dataframe_from_s3_parquet(
+            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
+        )
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
+
+        logger.info("Getting spatial data")
+        for p in input_properties:
+            p.get_spatial_data(uprn_filenames)
+
        logger.info("Getting components and epc recommendations")
-
-        # TODO: Move this to a class. We probably want a Recommender class which takes the injects the optimisers
-        #      in as a dependency and then the optimisers can take the input measures in as part of the setup() method
-
        recommendations = {}
        recommendations_scoring_data = []
-
-        for p in input_properties:
+        representative_recommendations = {}
+        for p in tqdm(input_properties):

            # Property recommendations
-            p.get_components(cleaned)
+            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)

-            property_recommendations = []
-
-            # Floor recommendations
-            floor_recommender = FloorRecommendations(property_instance=p, materials=materials)
-            floor_recommender.recommend()
-
-            if floor_recommender.recommendations:
-                property_recommendations.append(floor_recommender.recommendations)
-
-            # Wall recommendations
-
-            wall_recomender = WallRecommendations(property_instance=p, materials=materials)
-            wall_recomender.recommend()
-
-            if wall_recomender.recommendations:
-                property_recommendations.append(wall_recomender.recommendations)
-
-            # Roof recommendations
-            roof_recommender = RoofRecommendations(property_instance=p, materials=materials)
-            roof_recommender.recommend()
-
-            if roof_recommender.recommendations:
-                property_recommendations.append(roof_recommender.recommendations)
-
-            # Ventilation recommendations
-            ventilation_recomender = VentilationRecommendations(
-                property_instance=p,
-                materials=[part for part in materials if part["type"] == "mechanical_ventilation"]
-            )
-            ventilation_recomender.recommend()
-
-            if ventilation_recomender.recommendation:
-                property_recommendations.append(ventilation_recomender.recommendation)
-
-            # Fireplace sealing recommendations
-            fireplace_recommender = FireplaceRecommendations(property_instance=p)
-            fireplace_recommender.recommend()
-
-            if fireplace_recommender.recommendation:
-                property_recommendations.append(fireplace_recommender.recommendation)
-
-            # We insert temporary ids into the recommendations which is important for the optimiser later
-            property_recommendations = insert_temp_recommendation_id(property_recommendations)
+            recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
+            property_recommendations, property_representative_recommendations = recommender.recommend()

            if not property_recommendations:
                continue

            recommendations[p.id] = property_recommendations
+            representative_recommendations[p.id] = property_representative_recommendations

-            # Finally, we'll prepare data for predicting the impact on SAP
-            data_processor = DataProcessor(None, newdata=True)
-            data_processor.insert_data(pd.DataFrame([p.get_model_data()]))
-            # TODO: Temp
-            if data_processor.data["UPRN"].values[0] == "":
-                data_processor.data["UPRN"] = 0
+            p.create_base_difference_epc_record(cleaned_lookup=cleaned)
+            p.adjust_difference_record_with_recommendations(
+                property_recommendations, property_representative_recommendations
+            )

-            data_processor.pre_process()
+            recommendations_scoring_data.extend(p.recommendations_scoring_data)

-            starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
-            ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
-            fixed_data = data_processor.get_fixed_features()
-
-            # We update the ending record with the recommended updates and we set lodgement date to today
-            ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at)
-
-            for recommendations_by_type in property_recommendations:
-                for i, rec in enumerate(recommendations_by_type):
-                    scoring_dict = create_recommendation_scoring_data(
-                        property=p,
-                        recommendation=rec,
-                        starting_epc_data=starting_epc_data,
-                        ending_epc_data=ending_epc_data,
-                        fixed_data=fixed_data,
-                    )
-
-                    recommendations_scoring_data.append(scoring_dict)
-
-        # cleanup
-        del data_processor
+        # TODO: Make sure that number_habitable_rooms has been dropped

        logger.info("Preparing data for scoring in sap change api")
        recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
-
-        # Perform the same cleaning as in the model - first clean number of room variables though
-        recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
-            data_to_clean=recommendations_scoring_data,
-            cleaning_data=cleaning_data,
-            cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
-            colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+        recommendations_scoring_data = recommendations_scoring_data.drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
        )

-        recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
-            data_to_clean=recommendations_scoring_data,
-            cleaning_data=cleaning_data,
-            cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
-        ).drop(columns=["LOCAL_AUTHORITY"])
+        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)

-        recommendations_scoring_data = DataProcessor.clean_missings_after_description_process(
-            recommendations_scoring_data,
-            ignore_cols=[c for c in recommendations_scoring_data.columns if ("thermal_transmittance" in c) or (
-                "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
-        )
-
-        recommendations_scoring_data = DataProcessor.clean_efficiency_variables(recommendations_scoring_data)
-
-        sap_change_model_api = SAPChangeModelAPI(portfolio_id=body.portfolio_id, timestamp=created_at)
-        file_location = sap_change_model_api.upload_scoring_data(
-            df=recommendations_scoring_data, bucket=get_settings().DATA_BUCKET
-        )
-        response = sap_change_model_api.predict(
-            file_location="s3://{DATA_BUCKET}/".format(DATA_BUCKET=get_settings().DATA_BUCKET) + file_location,
-        )
-
-        # Retrieve the predictions
-        predictions = pd.DataFrame(
-            read_parquet_from_s3(
-                bucket_name=get_settings().PREDICTIONS_BUCKET,
-                file_key=response["storage_filepath"].split(get_settings().PREDICTIONS_BUCKET + "/")[1]
+        all_predictions = {
+            "sap_change_predictions": pd.DataFrame(),
+            "heat_demand_predictions": pd.DataFrame(),
+            "carbon_change_predictions": pd.DataFrame()
+        }
+        to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
+        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+            predictions_dict = model_api.predict_all(
+                df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
+                bucket=get_settings().DATA_BUCKET,
+                prediction_buckets={
+                    "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
+                    "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
+                    "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
+                }
            )
-        )

-        predictions["predictions"] = predictions["predictions"].astype(float).round(1)
-        predictions[['property_id', 'recommendation_id']] = predictions['id'].str.split('+', expand=True)
+            # Append the predictions to the predictions dictionary
+            for key, scored in predictions_dict.items():
+                all_predictions[key] = pd.concat([all_predictions[key], scored])

        # Insert the predictions into the recommendations and run the optimiser
+        # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
+        #       possibility with heating system
+        # TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
+        #      cylinder jacket), we should add these to the recommendations as default
        logger.info("Optimising recommendations")
        for property_id in recommendations.keys():

-            property = [p for p in input_properties if p.id == property_id][0]
-            property_predictions = predictions[predictions["property_id"] == str(property_id)]
+            property_instance = [p for p in input_properties if p.id == property_id][0]

-            for recommendations_by_type in recommendations[property_id]:
-                for rec in recommendations_by_type:
-                    new_sap = property_predictions[property_predictions["recommendation_id"] == str(
-                        rec["recommendation_id"]
-                    )]["predictions"].values[0]
+            recommendations_with_impact, current_adjusted_energy, expected_adjusted_energy = (
+                Recommendations.calculate_recommendation_impact(
+                    property_instance=property_instance,
+                    all_predictions=all_predictions,
+                    recommendations=recommendations
+                )
+            )

-                    rec["sap_points"] = new_sap - float(property.data["current-energy-efficiency"])
+            # Store the resulting adjusted energy in the property instance
+            property_instance.set_adjusted_energy(
+                current_adjusted_energy=current_adjusted_energy,
+                expected_adjusted_energy=expected_adjusted_energy
+            )

-                    if rec["sap_points"] is None:
-                        raise ValueError("Sap points missing")
+            input_measures = prepare_input_measures(recommendations_with_impact, body.goal)

-            input_measures = prepare_input_measures(recommendations[property_id], body.goal)
+            current_sap_points = int(property_instance.data["current-energy-efficiency"])
+            target_sap_points = epc_to_sap_lower_bound(body.goal_value)
+            sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)

            if body.budget:
-                optimiser = GainOptimiser(input_measures, max_cost=body.budget)
+                optimiser = GainOptimiser(
+                    input_measures, max_cost=body.budget, max_gain=sap_gain if sap_gain > 0 else 0
+                )
            else:
                # The minimum gain is the minimum number of SAP points required to get to the target SAP band
-                current_sap_points = int(property.data["current-energy-efficiency"])
-                target_sap_points = epc_to_sap_lower_bound(body.goal_value)
-
                # If the gain is negative, the optimiser will return an empty solution
                optimiser = CostOptimiser(
-                    input_measures, min_gain=target_sap_points - current_sap_points
+                    input_measures,
+                    min_gain=sap_gain
                )

            optimiser.setup()
@ -291,13 +281,26 @@ async def trigger_plan(body: PlanTriggerRequest):

            selected_recommendations = {r["id"] for r in solution}

+            # If wall insulation is selected, we also include mechanical ventilation as a best practice measure
+            if any(x in [r["type"] for r in solution] for x in [
+                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
+            ]):
+                ventilation_rec = next(
+                    (r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
+                    None
+                )
+
+                # If a matching recommendation was found, add its ID to the selected recommendations
+                if ventilation_rec:
+                    selected_recommendations.add(ventilation_rec["recommendation_id"])
+
            # We'll use the set of selected recommendations to filter the recommendations to upload
            final_recommendations = [
                [
                    {**rec, "default": True if rec["recommendation_id"] in selected_recommendations else False}
                    for rec in recommendations_by_type
                ]
-                for recommendations_by_type in recommendations[property_id]
+                for recommendations_by_type in recommendations_with_impact
            ]

            # We'll also unlist the recommendations so they're a bit easier to handle from here onwards
@ -311,6 +314,7 @@ async def trigger_plan(body: PlanTriggerRequest):
        # 3) the recommendations

        logger.info("Uploading recommendations to the database")
+        property_valuation_increases = []
        session.commit()
        for i in range(0, len(input_properties), BATCH_SIZE):
            try:
@ -318,30 +322,43 @@ async def trigger_plan(body: PlanTriggerRequest):
                batch_properties = input_properties[i:i + BATCH_SIZE]

                for p in batch_properties:
+                    recommendations_to_upload = recommendations.get(p.id, [])
+                    default_recommendations = [r for r in recommendations_to_upload if r["default"]]
+                    total_sap_points = sum([r["sap_points"] for r in default_recommendations])
+                    new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points
+                    new_epc = sap_to_epc(new_sap_points)
+
+                    valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
+
                    # Your existing operations
                    property_details_epc = p.get_property_details_epc(
-                        portfolio_id=body.portfolio_id, rating_lookup=rating_lookup
+                        portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
                    )
                    create_property_details_epc(session, property_details_epc)

-                    # TODO: TEMP
-                    if p.data["uprn"] == "":
-                        print("Get rid of me!")
-                        p.data["uprn"] = 0
+                    update_or_create_property_spatial_details(session, p.uprn, p.spatial)

-                    property_data = p.get_full_property_data()
+                    property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
                    update_property_data(
                        session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
                    )

-                    recommendations_to_upload = recommendations.get(p.id, [])
                    if not recommendations_to_upload:
                        continue

                    new_plan_id = create_plan(session, {
                        "portfolio_id": body.portfolio_id,
                        "property_id": p.id,
-                        "is_default": True
+                        "is_default": True,
+                        "valuation_increase_lower_bound": (
+                            valuations["lower_bound_increased_value"] - valuations["current_value"]
+                        ),
+                        "valuation_increase_upper_bound": (
+                            valuations["upper_bound_increased_value"] - valuations["current_value"]
+                        ),
+                        "valuation_increase_average": (
+                            valuations["average_increased_value"] - valuations["current_value"]
+                        ),
                    })

                    uploaded_recommendation_ids = upload_recommendations(session, recommendations_to_upload, p.id)
@ -350,6 +367,10 @@ async def trigger_plan(body: PlanTriggerRequest):
                        session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
                    )

+                    property_valuation_increases.append(
+                        valuations["average_increased_value"] - valuations["current_value"]
+                    )
+
                # Commit the session after each batch
                session.commit()

@ -365,7 +386,18 @@ async def trigger_plan(body: PlanTriggerRequest):
        # way to do this, but it's the simplest and will be a process that we can re-use since when we change a
        # recommendation from being default to not default, we'll need to re-run this process to re-calculate the
        # the portfolion level impact
-        aggregate_portfolio_recommendations(session, portfolio_id=body.portfolio_id)
+
+        total_valuation_increase = sum(property_valuation_increases)
+        labour_days = round(max(
+            [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()]
+        ))
+
+        aggregate_portfolio_recommendations(
+            session,
+            portfolio_id=body.portfolio_id,
+            total_valuation_increase=total_valuation_increase,
+            labour_days=labour_days
+        )

        # Commit final changes
        session.commit()
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@ -1,10 +1,53 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, conlist, validator
+from typing import Optional


 class PlanTriggerRequest(BaseModel):
-    budget: float | None = None
+    budget: Optional[float] = None
    goal: str
    housing_type: str
    goal_value: str
    portfolio_id: int
    trigger_file_path: str
+    already_installed_file_path: Optional[str] = None
+    patches_file_path: Optional[str] = None
+    exclusions: Optional[conlist(str, min_items=1)] = None
+
+    # Pre-defined list of possibilities for exclusions
+    _allowed_exclusions = {
+        "wall_insulation",
+        "ventilation",
+        "roof_insulation",
+        "floor_insulation",
+        "windows",
+        "fireplace",
+        "heating",
+        "hot_water",
+        "lighting",
+        "solar_pv"
+    }
+
+    _allowed_goals = {"Increase EPC"}
+
+    _allowed_housing_types = {"Social", "Private"}
+
+    # Validator to ensure exclusions are within the pre-defined possibilities
+    @validator('exclusions', each_item=True)
+    def check_exclusions(cls, v):
+        if v not in cls._allowed_exclusions:
+            raise ValueError(f"{v} is not an allowed exclusion")
+        return v
+
+    # Validator to ensure that the goal is within the pre-defined possibilities
+    @validator('goal')
+    def check_goal(cls, v):
+        if v not in cls._allowed_goals:
+            raise ValueError(f"{v} is not a valid goal")
+        return v
+
+    # Validator to ensure that the housing type is within the pre-defined possibilities
+    @validator('housing_type')
+    def check_housing_type(cls, v):
+        if v not in cls._allowed_housing_types:
+            raise ValueError(f"{v} is not a valid housing type")
+        return v
--- a/backend/app/plan/utils.py
+++ b/backend/app/plan/utils.py
@ -8,25 +8,6 @@ from backend.app.config import get_settings
 import msgpack


-def insert_temp_recommendation_id(property_recommendations):
-    """
-    Creates a temporary recommendation id which is needed for
-    filtering recommendations between default and no, after the optimiser has been
-    run
-    :param property_recommendations:  nested list of recommendations, grouped by data_types
-    :return: Updated recommendations_to_upload, where where recommendation has a "recommendation_id"
-             integer inserted
-    """
-    idx = 0
-
-    for recs in property_recommendations:
-        for rec in recs:
-            rec["recommendation_id"] = idx
-            idx += 1
-
-    return property_recommendations
-
-
 def get_cleaned():
    """
    This function will retrieve the cleaned dataset from s3 which has the cleaned
@ -44,145 +25,3 @@ def get_cleaned():
    cleaned = msgpack.unpackb(cleaned, raw=False)

    return cleaned
-
-
-def create_recommendation_scoring_data(
-    property: Property,
-    recommendation: dict,
-    starting_epc_data: pd.DataFrame,
-    ending_epc_data: pd.DataFrame,
-    fixed_data: pd.DataFrame,
-):
-    """
-    This wrapper function prepares data to be passed to the sap model api
-    :return:
-    """
-
-    scoring_dict = {
-        "UPRN": property.data["uprn"],
-        "id": "+".join([str(property.id), str(recommendation["recommendation_id"])]),
-        "LOCAL_AUTHORITY": property.data["local-authority"],
-        **starting_epc_data.to_dict("records")[0],
-        **ending_epc_data.to_dict("records")[0],
-        **fixed_data.to_dict("records")[0]
-    }
-
-    # Set staring u-values if we don't have them
-    if scoring_dict["walls_thermal_transmittance"] is None:
-        scoring_dict["walls_thermal_transmittance"] = get_wall_u_value(
-            clean_description=property.walls["clean_description"],
-            age_band=property.age_band,
-            is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
-            is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
-        )
-
-    if scoring_dict["floor_thermal_transmittance"] is None:
-        scoring_dict["floor_thermal_transmittance"] = get_floor_u_value(
-            floor_type=property.floor_type,
-            area=property.floor_area,
-            perimeter=property.perimeter,
-            wall_type=property.wall_type,
-            insulation_thickness=property.floor["insulation_thickness"],
-            age_band=property.age_band,
-        )
-
-    if scoring_dict["roof_thermal_transmittance"] is None:
-        scoring_dict["roof_thermal_transmittance"] = get_roof_u_value(
-            insulation_thickness=property.roof["insulation_thickness"],
-            has_dwelling_above=property.roof["has_dwelling_above"],
-            is_loft=property.roof["is_loft"],
-            is_roof_room=property.roof["is_roof_room"],
-            is_thatched=property.roof["is_thatched"],
-            age_band=property.age_band,
-            is_flat=property.roof["is_flat"],
-            is_pitched=property.roof["is_pitched"],
-            is_at_rafters=property.roof["is_at_rafters"],
-        )
-
-    for col in [
-        "walls_insulation_thickness", "floor_insulation_thickness", "roof_insulation_thickness"
-    ]:
-        if scoring_dict[col] is None:
-            scoring_dict[col] = "none"
-
-    # We update the description to indicate it's insulated
-    if recommendation["type"] == "wall_insulation":
-        # The upgrade made here is to the u-value of the walls and the description of the
-        # insulation thickness
-        scoring_dict["walls_thermal_transmittance_ENDING"] = recommendation["new_u_value"]
-        scoring_dict["walls_insulation_thickness_ENDING"] = "above average"
-        scoring_dict["WALLS_ENERGY_EFF_ENDING"] = "Good"
-    else:
-        if scoring_dict["walls_thermal_transmittance_ENDING"] is None:
-            scoring_dict["walls_thermal_transmittance_ENDING"] = get_wall_u_value(
-                clean_description=property.walls["clean_description"],
-                age_band=property.age_band,
-                is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
-                is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
-            )
-
-        if scoring_dict["walls_insulation_thickness_ENDING"] is None:
-            scoring_dict["walls_insulation_thickness_ENDING"] = "none"
-
-    # Update description to indicate it's insulate
-    if recommendation["type"] == "floor_insulation":
-        if len(recommendation["parts"]) > 1:
-            raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
-
-        scoring_dict["floor_thermal_transmittance_ENDING"] = recommendation["new_u_value"]
-        # We don't really see above average for this in the training data
-        scoring_dict["floor_insulation_thickness_ENDING"] = "average"
-        scoring_dict["FLOOR_ENERGY_EFF_ENDING"] = "Good"
-    else:
-        if scoring_dict["floor_thermal_transmittance_ENDING"] is None:
-            scoring_dict["floor_thermal_transmittance_ENDING"] = get_floor_u_value(
-                floor_type=property.floor_type,
-                area=property.floor_area,
-                perimeter=property.perimeter,
-                wall_type=property.wall_type,
-                insulation_thickness=property.floor["insulation_thickness"],
-                age_band=property.age_band,
-            )
-
-        if scoring_dict["floor_insulation_thickness_ENDING"] is None:
-            scoring_dict["floor_insulation_thickness_ENDING"] = "none"
-
-    if recommendation["type"] == "roof_insulation":
-        scoring_dict["roof_thermal_transmittance_ENDING"] = recommendation["new_u_value"]
-
-        parts = recommendation["parts"]
-        if len(parts) != 1:
-            raise ValueError("More than one part for roof insulation - investiage me")
-
-        scoring_dict["roof_insulation_thickness_ENDING"] = str(int(parts[0]["depth"]))
-        scoring_dict["ROOF_ENERGY_EFF_ENDING"] = "Very Good"
-    else:
-        # Fill missing roof u-values - this fill is not based on recommended upgrades
-        if scoring_dict["roof_thermal_transmittance_ENDING"] is None:
-            scoring_dict["roof_thermal_transmittance_ENDING"] = get_roof_u_value(
-                insulation_thickness=property.roof["insulation_thickness"],
-                has_dwelling_above=property.roof["has_dwelling_above"],
-                is_loft=property.roof["is_loft"],
-                is_roof_room=property.roof["is_roof_room"],
-                is_thatched=property.roof["is_thatched"],
-                age_band=property.age_band,
-                is_flat=property.roof["is_flat"],
-                is_pitched=property.roof["is_pitched"],
-                is_at_rafters=property.roof["is_at_rafters"],
-            )
-
-        if scoring_dict["roof_insulation_thickness_ENDING"] is None:
-            scoring_dict["roof_insulation_thickness_ENDING"] = "none"
-
-    if recommendation["type"] == "mechanical_ventilation":
-        scoring_dict["MECHANICAL_VENTILATION_ENDING"] = 'mechanical, extract only'
-
-    if recommendation["type"] == "sealing_open_fireplace":
-        scoring_dict["NUMBER_OPEN_FIREPLACES_ENDING"] = 0
-
-    if recommendation["type"] not in [
-        "wall_insulation", "floor_insulation", "roof_insulation", "mechanical_ventilation", "sealing_open_fireplace",
-    ]:
-        raise NotImplementedError("Implement me")
-
-    return scoring_dict
--- a/backend/app/utils.py
+++ b/backend/app/utils.py
@ -1,10 +1,7 @@
 import boto3
-import csv
-from io import StringIO
 import string
 import secrets
 import logging
-import pandas as pd
 from io import BytesIO


@ -42,25 +39,6 @@ def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
    return logger


-def read_csv_from_s3(bucket_name, filepath):
-    s3 = boto3.client('s3')
-
-    # Get the object from s3
-    s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
-
-    # Read the CSV body from the s3 object
-    body = s3_object['Body'].read()
-
-    # Use StringIO to create a file-like object from the string
-    csv_data = StringIO(body.decode('utf-8'))
-
-    # Use csv library to read it into a list of dictionaries
-    reader = csv.DictReader(csv_data)
-    data = list(reader)
-
-    return data
-
-
 def generate_api_key():
    # Define the characters that will be used to generate the api key
    characters = string.ascii_letters + string.digits
@ -69,15 +47,15 @@ def generate_api_key():
    return api_key


-def sap_to_epc(sap_points: int):
+def sap_to_epc(sap_points: int | float):
    """
    Simple utility function to convert SAP points to EPC rating.
-    :param sapPoints: numerical value of SAP points, typically between 0 and 100
+    :param sap_points: numerical value of SAP points, typically between 0 and 100
    :return:
    """

-    if sap_points <= 0 or sap_points > 100:
-        raise ValueError("SAP points should be between 1 and 100.")
+    if sap_points <= 0:
+        raise ValueError("SAP points should be above 0.")

    if sap_points >= 92:
        return "A"
@ -121,19 +99,6 @@ def epc_to_sap_lower_bound(epc: str):
        raise ValueError("EPC rating should be between A and G")


-def read_parquet_from_s3(bucket_name, file_key):
-    client = boto3.client('s3')
-
-    # Get the object
-    s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
-
-    # Read the CSV body into a DataFrame
-    csv_body = s3_object["Body"].read()
-    df = pd.read_parquet(BytesIO(csv_body))
-
-    return df
-
-
 def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
    """
    Save a pandas DataFrame to S3 as a Parquet file.
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@ -0,0 +1,117 @@
+class AnnualBillSavings:
+    """
+    This is a simple class which will estimate the annual bill savings, based on the kwh savings.
+    This class uses data from Ofgem, including their price caps, to provide us with an estimate for
+    1KWH of energy.
+    """
+
+    # These gas an electricity consumption figures are based off of figures presented by Ofgem
+    # https://www.ofgem.gov.uk/information-consumers/energy-advice-households/average-gas-and-electricity-use-explained
+    AVERAGE_ELECTRICITY_CONSUMPTION = 2700
+    AVERAGE_GAS_CONSUMPTION = 11500
+
+    # Latest price cap figures from Ofgem are for April 2024
+    # https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
+    ELECTRICITY_PRICE_CAP = 0.245
+    GAS_PRICE_CAP = 0.0604
+
+    # This is a weighted mean of the price caps, using the consumption figures above as weights
+    PRICE_FACTOR = 0.09549999999999999
+
+    # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
+    DAILY_STANDARD_CHARGE_GAS = 0.3143
+    DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
+
+    EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
+
+    @classmethod
+    def estimate(cls, kwh: float):
+        """
+        Estimate the annual bill savings based on the kwh savings
+        :param kwh: The kwh savings
+        :return: An estimate for annual bill savings
+        """
+        return cls.PRICE_FACTOR * kwh
+
+    @classmethod
+    def estimate_electric(cls, kwh: float):
+        """
+        Estimate the annual bill savings based on the kwh savings
+        :param kwh: The kwh savings
+        :return: An estimate for annual bill savings
+        """
+        return cls.ELECTRICITY_PRICE_CAP * kwh
+
+    @classmethod
+    def calculate_annual_bill(cls, kwh):
+        """
+        This method will estimate the total annual bill for a property
+        It assumed gas & electricity are used
+        :param kwh: The total kwh consumption
+        :return: An estimate for annual bill
+        """
+
+        return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
+
+    @classmethod
+    def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
+        """
+        The over-prediction of energy use by EPCs in Great Britain: A comparison
+        of EPC-modelled and metered primary energy use intensity
+
+        Which can be found here: https://www.sciencedirect.com/science/article/pii/S0378778823002542
+        We implement the results on page 10
+
+        :return:
+        """
+
+        gradients = {
+            "A": -0.1,
+            "B": -0.1,
+            "C": -0.43,
+            "D": -0.52,
+            "E": -0.7,
+            "F": -0.76,
+            "G": -0.76
+        }
+
+        intercepts = {
+            "A": 28,
+            "B": 28,
+            "C": 97,
+            "D": 119,
+            "E": 160,
+            "F": 157,
+            "G": 157
+        }
+
+        gradient = gradients[current_epc_rating]
+        intercept = intercepts[current_epc_rating]
+
+        # This should be negative
+        consumption_difference = gradient * epc_energy_consumption + intercept
+
+        adjusted_consumption = (epc_energy_consumption + consumption_difference)
+        if adjusted_consumption < 0:
+            raise ValueError("consumption_difference should be negative")
+
+        return adjusted_consumption
+
+    @classmethod
+    def adjust_expected_band(cls, expected_epc_rating, current_epc_rating):
+        """
+        Because of the differing intercepts and intercepts when adjusting, it's possible for
+        expected_adjusted_energy to be bigger than current_adjusted_energy. In this case, we'll
+        adjust, against at most 1 EPC band above the curent. This function performs the EPC adjustment
+        :param expected_epc_rating: The expected EPC rating
+        :param current_epc_rating: The current EPC rating
+        """
+
+        # Find index of expected EPC rating
+        expected_index = cls.EPC_BANDS.index(expected_epc_rating)
+        current_index = cls.EPC_BANDS.index(current_epc_rating)
+
+        if expected_index - 1 < current_index:
+            return current_epc_rating
+
+        return cls.EPC_BANDS[expected_index - 1]
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@ -0,0 +1,173 @@
+import numpy as np
+
+
+class PropertyValuation:
+    """
+    This is a placeholder class for the property valuation model
+    """
+
+    UPRN_VALUE_LOOKUP = {
+        15038202: 202000,
+        37024763: 213000,
+        100070478545: 212000,
+        100070297696: 662000,  # Based on Zoopla's estimation of nearby house, 8 bloomfield road
+        100070476394: 222000,  # Based on Zoopla's estimation of next door, 20 Parkside
+        100071264896: 128000,
+        # Based on next door neighbour: https://themovemarket.com/tools/propertyprices/flat-2-queens-wood-house-219
+        # -brandwood-road-birmingham-b14-6pu
+        100070533688: 218000,  # Based on Zoopla's estimation of 95 Tenby Road, which is also mid terrace
+        100070505235: 344000,  # Based on Zoopla's estimation of 131 School road, which is also semi-detached
+        100070513306: 182000,  # Based on Zoopla's estimation of 61 Simmons Drive
+        100071306896: 77000,  # Based on Flat 2 of 44 Wedgewood Road on Zoopla
+        100021192109: 650000,  # Based on Zoopla
+        766249482: 358000,  # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached
+        100120703802: 277000,  # Based on Zoopla
+        10014469685: 286000,  # Based on Zoopla
+        10001328782: 196000,  # Based on Zoopla
+        # Urban Splash - valuations from The Move Market
+        10023345430: 74_000,
+        10023345435: 99_000,
+        10023345436: 62_000,
+        10023345441: 62_000,
+        10094183503: 2_988_000,
+        10094183499: 123_000,
+        10070056824: 70_000,
+        110070056242: 100_000,
+        10070056243: 130_000,
+        10070056817: 130_000,
+        10094183501: 185_000,
+        10070056250: 71_000,
+        10094183500: 185_000,
+        10070056843: 67_000,
+        10070056844: 67_000,
+        10070056241: 76_000,
+        10070056834: 63_000,
+        10023345439: 62_000,
+        10070056815: 101_000,
+        10070056816: 101_000,
+        10094183498: 101_000,
+        10070056840: 673_000,
+        10070056848: 76_000,
+        10070056849: 76_000,
+        10070056829: 76_000,
+        10070056920: 76_000,
+        10023345463: 76_000,
+        # IMMO Dudley Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        90070461: 172_000,  # Based on Zoopla
+        90022227: 181_000,  # Based on Zoopla
+        90106884: 180_000,  # Based on Zoopla
+        90051858: 201_000,  # Based on Zoopla
+        90060989: 172_000,  # Based on Zoopla
+        90048026: 196_000,  # Based on Zoopla
+        90077535: 192_000,  # Based on Zoopla
+        90093693: 279_000,  # Based on Zoopla
+        90055152: 149_000,  # Based on Zoopla
+        90028499: 238_000,  # Based on Zoopla
+    }
+
+    # We base our valuation uplifts on a number of sources
+    # https://www.moneysupermarket.com/gas-and-electricity/value-of-efficiency/
+    MSM_MAPPING = [
+        {"start": "G", "end": "F", "increase_percentage": 0.06},
+        {"start": "F", "end": "E", "increase_percentage": 0.01},
+        {"start": "E", "end": "D", "increase_percentage": 0.01},
+        {"start": "D", "end": "C", "increase_percentage": 0.02},
+        {"start": "C", "end": "B", "increase_percentage": 0.04},
+        {"start": "B", "end": "A", "increase_percentage": 0.0},
+    ]
+
+    # https://www.lloydsbankinggroup.com/media/press-releases/2021/halifax/homebuyers-pay-a-green-premium-of-40000
+    # -for-the-most-energy-efficient-properties.html
+    LLOYDS_MAPPING = [
+        {"start": "G", "end": "F", "increase_percentage": 0.038},
+        {"start": "F", "end": "E", "increase_percentage": 0.029},
+        {"start": "E", "end": "D", "increase_percentage": 0.024},
+        {"start": "D", "end": "C", "increase_percentage": 0.02},
+        {"start": "C", "end": "B", "increase_percentage": 0.02},
+        {"start": "B", "end": "A", "increase_percentage": 0.018},
+    ]
+
+    KNIGHT_FRANK_MAPPING = [
+        {"start": "D", "end": "C", "increase_percentage": 0.03},
+        {"start": "D", "end": "B", "increase_percentage": 0.088},
+        {"start": "D", "end": "A", "increase_percentage": 0.088},
+    ]
+
+    NATIONWIDE_MAPPING = [
+        # {"start": "G", "end": "D", "increase_percentage": 0.035},
+        # {"start": "F", "end": "D", "increase_percentage": 0.035},
+        # {"start": "D", "end": "B", "increase_percentage": 0.017},
+        # {"start": "D", "end": "A", "increase_percentage": 0.017},
+    ]
+
+    EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
+
+    @classmethod
+    def get_increase(cls, epc_band_range):
+
+        increases = []
+        for i in range(len(epc_band_range)):
+
+            if i == len(epc_band_range) - 1:
+                break
+
+            current = epc_band_range[i]
+            next = epc_band_range[i + 1]
+
+            msm_increase = [x for x in cls.MSM_MAPPING if x["start"] == current and x["end"] == next][0]
+            lloyds_increase = [x for x in cls.LLOYDS_MAPPING if x["start"] == current and x["end"] == next][0]
+
+            increases.append(
+                {
+                    "start": current,
+                    "end": next,
+                    "msm_increase": msm_increase["increase_percentage"],
+                    "lloyds_increase": lloyds_increase["increase_percentage"],
+                }
+            )
+
+        # We now aggregate the increases. The should be compound increases so we multiply them together
+        msm_increase = np.prod([1 + x["msm_increase"] for x in increases]) - 1
+        lloyds_increase = np.prod([1 + x["lloyds_increase"] for x in increases]) - 1
+
+        return msm_increase, lloyds_increase
+
+    @classmethod
+    def estimate(cls, property_instance, target_epc):
+        value = cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
+
+        if not value:
+            return {
+                "current_value": 0,
+                "lower_bound_increased_value": 0,
+                "upper_bound_increased_value": 0,
+                "average_increased_value": 0,
+                "average_increase": 0
+            }
+
+        current_epc = property_instance.data["current-energy-rating"]
+        # We get the spectrum of ratings between the current and target EPC
+        epc_band_range = cls.EPC_BANDS[cls.EPC_BANDS.index(current_epc): cls.EPC_BANDS.index(target_epc) + 1]
+
+        msm_increase, lloyds_increase = cls.get_increase(epc_band_range)
+
+        # We now use the knight frank and nationwide data to get further valuation evidence, if we have it
+        kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
+        nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
+
+        kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None
+        nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None
+
+        all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None]
+
+        max_increase = max(all_increases)
+        min_increase = min(all_increases)
+        avg_increase = np.mean(all_increases)
+
+        return {
+            "current_value": value,
+            "lower_bound_increased_value": value * (1 + min_increase),
+            "upper_bound_increased_value": value * (1 + max_increase),
+            "average_increased_value": value * (1 + avg_increase),
+            "average_increase": value * (1 + avg_increase) - value
+        }
--- a/backend/ml_models/sap_change_model/init.py
+++ b/backend/ml_models/sap_change_model/init.py
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@ -0,0 +1,144 @@
+import pandas as pd
+import requests
+from requests.exceptions import RequestException
+from utils.logger import setup_logger
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+
+logger = setup_logger()
+
+
+class ModelApi:
+    MODEL_PREFIXES = [
+        "sap_change_predictions",
+        "heat_demand_predictions",
+        "carbon_change_predictions"
+    ]
+
+    MODEL_URLS = {
+        "sap_change_predictions": "sapmodel",
+        "heat_demand_predictions": "heatmodel",
+        "carbon_change_predictions": "carbonmodel"
+    }
+
+    def __init__(
+        self,
+        portfolio_id,
+        timestamp,
+        base_url="https://api.dev.hestia.homes",
+    ):
+        """
+        This class handles the communication with the Model APIs. These models include SAP change, heat demain change
+        and carbon change
+
+        property_id (int, optional): :
+        :param portfolio_id: The portfolio ID to be passed in the request payload. Defaults to 4.
+        :param timestamp: The creation timestamp to be passed in the request payload. Defaults to None.
+        :param base_url:
+        """
+        self.base_url = base_url
+        self.portfolio_id = portfolio_id
+        self.timestamp = timestamp
+
+    def upload_scoring_data(self, df: pd.DataFrame, bucket: str, model_prefix: str) -> str:
+        """
+        The sap model api needs a scoring data that is sitting in s3 to use as a dataset to score on
+        This method allows the user to upload a table as a parquet file. This method will return the file
+        location, which can be used as the file location in the predict() method
+
+        :param df:  Pandas dataframe with scoring data to be uploaded to s3
+        :param bucket: Name of the bucket in s3 to upload to
+        :param model_prefix: The model prefix to be used in the file location
+        :return:
+        """
+
+        if model_prefix not in self.MODEL_PREFIXES:
+            raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
+
+        # Store parquet file in s3 for scoring
+        file_location = f"{model_prefix}/{self.portfolio_id}/{self.timestamp}.parquet"
+
+        logger.info("Storing scoring data to s3")
+        save_dataframe_to_s3_parquet(
+            df=df,
+            bucket_name=bucket,
+            file_key=file_location
+        )
+
+        return file_location
+
+    def predict(self, file_location, model_prefix: str):
+        """Makes a POST request to the SAP Change Model API with the provided parameters.
+
+        Args:
+            file_location (str): The file location to be passed in the request payload.
+            model_prefix (str): The model prefix to be used in the request URL.
+
+        Returns:
+            dict: The API response as a dictionary if the request was successful, None otherwise.
+        """
+        logger.info(f"Making request to {model_prefix} change api")
+        url = f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict"
+        payload = {
+            "file_location": file_location,
+            "property_id": "",  # This should get removed
+            "portfolio_id": self.portfolio_id,
+            "created_at": self.timestamp
+        }
+
+        try:
+            response = requests.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=120)
+
+            # Check if the response status code is 2xx (success)
+            response.raise_for_status()
+
+            # Return the JSON response as a Python dictionary
+            return response.json()
+        except RequestException as e:
+            logger.error(f"An error occurred: {e}")
+            # In case of an error, you might want to return None or raise the exception
+            # depending on how you want to handle errors in your application
+            return None
+
+    def predict_all(self, df, bucket, prediction_buckets) -> dict:
+
+        """
+        For each model prefix, this method will upload the scoring data to s3 and then make a request to the
+        model api to generate predictions. The predictions will be stored in the predictions bucket.
+        This method will then fetch the stored predictions and format them, returning all of the predictions as
+        a dictionary of panaas dataframes
+        :param df:  Pandas dataframe with scoring data to be uploaded to s3
+        :param bucket: Name of the bucket in s3 to upload to
+        :param prediction_buckets: Dictionary containing the prediction buckets for each model prefix
+        :return:
+        """
+
+        predictions = {}
+        for model_prefix in self.MODEL_PREFIXES:
+            logger.info(f"Scoring for model prefix: {model_prefix}")
+            file_location = self.upload_scoring_data(df, bucket, model_prefix)
+            response = self.predict(
+                "s3://{DATA_BUCKET}/".format(DATA_BUCKET=bucket) + file_location, model_prefix
+            )
+
+            predictions_bucket = prediction_buckets[model_prefix]
+
+            # Retrieve the predictions
+            predictions_df = pd.DataFrame(
+                read_dataframe_from_s3_parquet(
+                    bucket_name=predictions_bucket,
+                    file_key=response["storage_filepath"].split(predictions_bucket + "/")[1]
+                )
+            )
+
+            predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
+            predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
+            # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
+            # string split on phase= and then grab the second element of the resulting list. We could also use a
+            # regular expression to do this but we use the string split method here, for safety.
+            predictions_df['phase'] = predictions_df['recommendation_id'].str.split('phase=').str[1].str[0]
+            # Convert back to int
+            predictions_df['phase'] = predictions_df['phase'].astype(int)
+
+            predictions[model_prefix] = predictions_df
+
+        return predictions
--- a/backend/ml_models/sap_change_model/api.py
+++ b/backend/ml_models/sap_change_model/api.py
@ -1,83 +0,0 @@
-import pandas as pd
-import requests
-from requests.exceptions import RequestException
-from utils.logger import setup_logger
-from utils.s3 import save_dataframe_to_s3_parquet
-
-logger = setup_logger()
-
-
-class SAPChangeModelAPI:
-    def __init__(
-        self,
-        portfolio_id,
-        timestamp,
-        base_url="https://api.dev.hestia.homes",
-    ):
-        """
-        property_id (int, optional): :
-        :param portfolio_id: The portfolio ID to be passed in the request payload. Defaults to 4.
-        :param timestamp: The creation timestamp to be passed in the request payload. Defaults to None.
-        :param base_url:
-        """
-        self.base_url = base_url
-        self.portfolio_id = portfolio_id
-        self.timestamp = timestamp
-
-    def upload_scoring_data(self, df: pd.DataFrame, bucket: str) -> str:
-        """
-        The sap model api needs a scoring data that is sitting in s3 to use as a dataset to score on
-        This method allows the user to upload a table as a parquet file. This method will return the file
-        location, which can be used as the file location in the predict() method
-
-        :param df:  Pandas dataframe with scoring data to be uploaded to s3
-        :param bucket: Name of the bucket in s3 to upload to
-        :return:
-        """
-
-        # Store parquet file in s3 for scoring
-        file_location = "sap_change_predictions/{portfolio_id}/{timestamp}.parquet".format(
-            portfolio_id=self.portfolio_id,
-            timestamp=self.timestamp
-        )
-
-        logger.info("Storing scoring data to s3")
-        save_dataframe_to_s3_parquet(
-            df=df,
-            bucket_name=bucket,
-            file_key=file_location
-        )
-
-        return file_location
-
-    def predict(self, file_location):
-        """Makes a POST request to the SAP Change Model API with the provided parameters.
-
-        Args:
-            file_location (str): The file location to be passed in the request payload.
-
-        Returns:
-            dict: The API response as a dictionary if the request was successful, None otherwise.
-        """
-        logger.info("Making request to sap change api")
-        url = f"{self.base_url}/sapmodel/predict"
-        payload = {
-            "file_location": file_location,
-            "property_id": "",  # This should get removed
-            "portfolio_id": self.portfolio_id,
-            "created_at": self.timestamp
-        }
-
-        try:
-            response = requests.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=120)
-
-            # Check if the response status code is 2xx (success)
-            response.raise_for_status()
-
-            # Return the JSON response as a Python dictionary
-            return response.json()
-        except RequestException as e:
-            logger.error(f"An error occurred: {e}")
-            # In case of an error, you might want to return None or raise the exception
-            # depending on how you want to handle errors in your application
-            return None
--- a/backend/requirements/base.txt
+++ b/backend/requirements/base.txt
@ -35,4 +35,5 @@ mip==1.15.0
 boto3==1.28.3
 pandas==1.5.3
 pyarrow==12.0.1
-textblob
+textblob
+usaddress==0.5.10
--- a/backend/tests/test_property.py
+++ b/backend/tests/test_property.py
@ -1,21 +1,24 @@
+import pandas as pd
 import pytest
 from unittest.mock import Mock
-from epc_api.client import EpcClient
 from backend.Property import Property
 from etl.epc_clean.EpcClean import EpcClean
+from etl.epc.Record import EPCRecord

 # Define some test data
 mock_epc_response = {
    "rows": [
        {
+            "tenure": "rental (social)",
            "lmk-key": 1,
            "uprn": 1,
            "number-habitable-rooms": 5,
            "property-type": "House",
+            "built-form": "Detached",
            "inspection-date": "2023-06-01",
            'lodgement-datetime': '2023-06-01 20:29:01',
            "some-other-key": "some-value",
-            "roof-description": "Roof Description",
+            "roof-description": "pitched, no insulation",
            "walls-description": "Walls Description",
            "windows-description": "Windows Description",
            "mainheat-description": "Main Heating Description",
@ -35,13 +38,15 @@ mock_epc_response = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        },
        {
            "lmk-key": 2,
            "uprn": 2,
            "number-habitable-rooms": 5,
            "property-type": "House",
+            "built-form": "Detached",
            "inspection-date": "2023-05-01",
            'lodgement-datetime': '2023-05-01 20:29:01',
            "some-other-key": "some-other-value",
@ -65,7 +70,8 @@ mock_epc_response = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        }
    ]
 }
@ -97,7 +103,8 @@ mock_epc_response_dupe = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        },
        {
            "lmk-key": 2,
@ -125,7 +132,8 @@ mock_epc_response_dupe = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        },
        {
            "lmk-key": 3,
@ -153,36 +161,71 @@ mock_epc_response_dupe = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        }
    ]
 }


 class TestProperty:
+
    @pytest.fixture(autouse=True)
-    def property_instance(self, mock_epc_client, mock_cleaner):
-        property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client)
+    def mock_photo_supply_lookup(self):
+        return pd.DataFrame(
+            [
+                dict(
+                    tenure="rental (social)",
+                    built_form="Detached",
+                    property_type="House",
+                    construction_age_band="England and Wales: 1967-1975",
+                    is_flat=False,
+                    is_pitched=True,
+                    is_roof_room=False,
+                    floor_area_decile=2,
+                    photo_supply_median=40
+                )
+            ]
+        )
+
+    @pytest.fixture(autouse=True)
+    def mock_floor_area_decile_thresholds(self):
+        return pd.DataFrame(
+            {"floor_area_decile_thresholds": [0, 10, 30, 50]}
+        )
+
+    @pytest.fixture(autouse=True)
+    def property_instance(self, mock_cleaner):
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = mock_epc_response["rows"][0]
+
+        property_instance = Property(id=1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
+        property_instance.number_of_floors = 2
+        property_instance.number_of_rooms = 5
+        property_instance.floor_area = 100
+        property_instance.floor_height = 2.5
        return property_instance

    @pytest.fixture(autouse=True)
-    def property_instance_dupe_data(self, mock_epc_client_dupe_data):
-        property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data)
+    def property_instance_dupe_data(self):
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = mock_epc_response_dupe["rows"][0]
+        property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address", epc_record=epc_record)
        return property_instance_dupe_data

-    @pytest.fixture
-    def mock_epc_client(self):
-        mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
-        mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
-        mock_epc_client.auth_token = "mocked_auth_token"
-        return mock_epc_client
-
-    @pytest.fixture
-    def mock_epc_client_dupe_data(self):
-        mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
-        mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
-        mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
-        return mock_epc_client_dupe_data
+    # @pytest.fixture
+    # def mock_epc_client(self):
+    #     mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
+    #     mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
+    #     mock_epc_client.auth_token = "mocked_auth_token"
+    #     return mock_epc_client
+    #
+    # @pytest.fixture
+    # def mock_epc_client_dupe_data(self):
+    #     mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
+    #     mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
+    #     mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
+    #     return mock_epc_client_dupe_data

    @pytest.fixture
    def mock_cleaner(self):
@ -221,7 +264,11 @@ class TestProperty:
        }

        mock_cleaner.cleaned = {
-            "roof-description": [{"original_description": "Roof Description"}],
+            "roof-description": [
+                {"original_description": "Roof Description"},
+                {"original_description": "pitched, no insulation", "is_pitched": True, "is_flat": False,
+                 "is_roof_room": False}
+            ],
            "walls-description": [walls_data],
            "windows-description": [{"original_description": "Windows Description"}],
            "mainheat-description": [{"original_description": "Main Heating Description"}],
@ -232,37 +279,34 @@ class TestProperty:
        }
        return mock_cleaner

-    def test_init(self, mock_epc_client):
-        inst1 = Property(0, "AB12CD", "Test Address", epc_client=mock_epc_client)
-        # Should be mocked auth token
-        assert inst1.epc_client.auth_token == "mocked_auth_token"
+    def test_init(self):
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = {"uprn": 1}
+        inst1 = Property(0, postcode="AB12CD", address="Test Address", epc_record=epc_record)

-        inst2 = Property(3, "AB12CD", "Test Address", epc_client=mock_epc_client)
-        assert inst2.epc_client.auth_token
+        assert inst1.data is not None

-        inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data"}, epc_client=mock_epc_client)
-        assert inst3.data == {"some": "data"}
+        inst2 = Property(3, "AB12CD", "Test Address", epc_record=epc_record)
+        assert inst2.id == 3

-        data = inst3.search_address_epc()
-        assert data is None
+        inst3 = Property(4, "AB12CD", "Test Address", epc_record=epc_record)
+        assert inst3.data == {"uprn": 1}

-    def test_search_address_epc(self, property_instance):
-        # Call the method to test
-        property_instance.search_address_epc()
-
-        # Verify that the correct data is being returned
-        assert property_instance.data == mock_epc_response["rows"][0]
-
-    def test_search_address_epc_multiple_results(self, property_instance_dupe_data, mock_epc_client_dupe_data):
-        with pytest.raises(Exception, match="More than one result found for this address - investigate me"):
-            property_instance_dupe_data.search_address_epc()
-
-    def test_get_components(self, property_instance, mock_cleaner, mock_epc_client):
-        property_instance.search_address_epc()
-        property_instance.get_components(mock_cleaner.cleaned)
+    def test_get_components(
+        self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+    ):
+        property_instance.get_components(
+            mock_cleaner.cleaned,
+            photo_supply_lookup=mock_photo_supply_lookup,
+            floor_area_decile_thresholds=mock_floor_area_decile_thresholds
+        )

        # Verify that the components are set correctly
-        assert property_instance.roof == {"original_description": "Roof Description"}
+        assert property_instance.roof == {
+            'original_description': 'pitched, no insulation', 'is_pitched': True,
+            'is_flat': False, 'is_roof_room': False
+        }
+
        assert property_instance.walls == {
            "original_description": "Walls Description",
            "is_cavity_wall": True,
@ -286,24 +330,15 @@ class TestProperty:

        # Verify that ValueError is raised when EpcClean doesn't contain cleaned data
        with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"):
-            property_instance.get_components(mock_cleaner.cleaned)
+            property_instance.get_components(mock_cleaner.cleaned, pd.DataFrame(), pd.DataFrame())

-    def test_get_components_no_data(self, property_instance, mock_cleaner):
+    def test_get_components_no_attributes(
+        self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+    ):
        # Modify the mock cleaner to have no attributes for a specific description
        mock_cleaner.cleaned = {
            "roof-description": []
        }
-
-        # Verify that ValueError is raised when no attributes are found
-        with pytest.raises(ValueError, match="Property does not contain data"):
-            property_instance.get_components(mock_cleaner.cleaned)
-
-    def test_get_components_no_attributes(self, property_instance, mock_cleaner):
-        # Modify the mock cleaner to have no attributes for a specific description
-        mock_cleaner.cleaned = {
-            "roof-description": []
-        }
-        property_instance.search_address_epc()
        property_instance.data["roof-description"] = "Pitched, no insulation"
        property_instance.walls = {
            "original_description": "Walls Description",
@ -324,14 +359,17 @@ class TestProperty:
        }

        # Assert backup cleaning has been applied
-        property_instance.get_components(mock_cleaner.cleaned)
+        property_instance.get_components(
+            mock_cleaner.cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+        )

        assert property_instance.roof["clean_description"] == "Pitched, no insulation"
        assert property_instance.roof["is_pitched"]

-    def test_get_components_multiple_attributes(self, property_instance, mock_cleaner):
+    def test_get_components_multiple_attributes(
+        self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+    ):
        # This shouldn't happen - it would mean a cleaning error
-        property_instance.search_address_epc()
        property_instance.data["roof-description"] = "Roof Description"
        cleaned = {
            "roof-description": [
@ -342,4 +380,102 @@ class TestProperty:

        # Verify that ValueError is raised when multiple attributes are found
        with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"):
-            property_instance.get_components(cleaned)
+            property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds)
+
+    def test_set_spatial(self):
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = mock_epc_response["rows"][0]
+        prop = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
+
+        spatial1 = pd.DataFrame([{
+            'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
+            'conservation_status': True, 'is_listed_building': False, 'is_heritage_building': True
+        }])
+
+        prop.set_spatial(spatial1)
+
+        assert prop.in_conservation_area
+        assert not prop.is_listed
+        assert prop.is_heritage
+        assert prop.restricted_measures
+
+        prop2 = Property(1, "AB12CD", "Test Address", epc_record=epc_record)
+
+        spatial2 = pd.DataFrame([{
+            'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
+            'conservation_status': None, 'is_listed_building': False, 'is_heritage_building': False
+        }])
+
+        prop2.set_spatial(spatial2)
+
+        assert prop2.in_conservation_area is None
+        assert not prop2.is_listed
+        assert not prop2.is_heritage
+        assert not prop2.restricted_measures
+
+    def test_set_floor_level(self):
+        # In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground
+        # floor, so we should set floor_level to 0
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = {'floor-level': '01', 'property-type': 'Flat'}
+        prop = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
+        prop.floor = {
+            'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation',
+            'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': True,
+            'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': True,
+            'another_property_below': False, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
+            'floor_insulation_thickness': 'none'
+        }
+
+        prop.set_floor_level()
+
+        assert prop.floor_level == 0
+
+        # This property is labelled as being on the ground floor but actually has another property below
+        # so we set floor level to 1
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = {'floor-level': 'Ground', 'property-type': 'Flat'}
+        prop2 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
+        prop2.floor = {
+            'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
+            'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
+            'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
+            'another_property_below': True, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
+            'floor_insulation_thickness': 'none'
+        }
+
+        prop2.set_floor_level()
+
+        assert prop2.floor_level == 1
+
+        # this property is correctly labelled as being on the 2nd floor
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = {'floor-level': '02', 'property-type': 'Flat'}
+        prop3 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
+        prop3.floor = {
+            'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
+            'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
+            'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
+            'another_property_below': True, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
+            'floor_insulation_thickness': 'none'
+        }
+
+        prop3.set_floor_level()
+
+        assert prop3.floor_level == 2
+
+        # Example of a house
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = {'floor-level': '', 'property-type': 'House'}
+        prop4 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
+        prop4.floor = {
+            'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
+            'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
+            'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
+            'another_property_below': False, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
+            'floor_insulation_thickness': 'none'
+        }
+
+        prop4.set_floor_level()
+
+        assert prop4.floor_level is None
--- a/backend/tests/test_sap_model_prep.py
+++ b/backend/tests/test_sap_model_prep.py
@ -1,989 +0,0 @@
-from backend.Property import Property
-from etl.epc.DataProcessor import DataProcessor
-from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
-from etl.epc.settings import COLUMNS_TO_MERGE_ON
-from epc_api.client import EpcClient
-import pandas as pd
-import pytest
-import msgpack
-
-from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
-from tqdm import tqdm
-
-
-# Handy code for selecting testing data
-# import pickle
-#
-# with open("sap_dataset.pickle", "rb") as f:
-#     sap_change_dataset = pickle.load(f)
-#
-# search_from = sap_change_dataset[
-#     (sap_change_dataset["walls_thermal_transmittance_ENDING"] == sap_change_dataset["walls_thermal_transmittance"]) &
-#     sap_change_dataset["is_to_unheated_space"]
-#     ]
-# search_from = search_from[
-#     (search_from["roof_thermal_transmittance_ENDING"] == search_from["roof_thermal_transmittance"]) &
-#     (search_from["floor_thermal_transmittance_ENDING"] != search_from["floor_thermal_transmittance"]) &
-#     (search_from["MECHANICAL_VENTILATION_ENDING"] == search_from["MECHANICAL_VENTILATION_STARTING"]) &
-#     (search_from["SECONDHEAT_DESCRIPTION_ENDING"] == search_from["SECONDHEAT_DESCRIPTION_STARTING"]) &
-#     (search_from["GLAZED_TYPE_ENDING"] == search_from["GLAZED_TYPE_STARTING"])
-#     ]
-#
-# # Find a record where the only difference is cavity wall getting filled
-# ending_cols = [c for c in search_from.columns if "_ENDING" in c]
-#
-# ignore = [
-#     "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING", "TRANSACTION_TYPE_ENDING", "FLOOR_HEIGHT_ENDING",
-#     "DAYS_TO_ENDING", "TOTAL_FLOOR_AREA_ENDING"
-# ]
-#
-# ending_cols = [c for c in ending_cols if c not in ignore]
-#
-# for _, row in tqdm(search_from.iterrows(), total=search_from.shape[0]):
-#
-#     same = True
-#     starting_cols = []
-#     for c in ending_cols:
-#
-#         starting_col = c.replace("_ENDING", "")
-#         if starting_col not in search_from.columns:
-#             starting_col = c.replace("_ENDING", "_STARTING")
-#             if starting_col not in search_from.columns:
-#                 raise Exception("something went wrong")
-#
-#         starting_cols.append(starting_col)
-#
-#         # We want them to be different
-#         if c == "floor_thermal_transmittance_ENDING":
-#             if (row[c] == row[starting_col]) | (row[starting_col] != "natural"):
-#                 same = False
-#                 break
-#             else:
-#                 continue
-#
-#         # We now check if the starting and ending values are the same
-#         if row[c] != row[starting_col]:
-#             same = False
-#             break
-#
-#     if same:
-#         raise Exception("We found one!")
-#
-#     fixed_cols = [c for c in search_from.columns if c not in starting_cols + ending_cols]
-#
-#     import pandas as pd
-#
-#     start = row[["SAP_STARTING"] + starting_cols]
-#     start.index = [c.replace("_STARTING", "") for c in start.index]
-#     end = row[["SAP_ENDING"] + ending_cols]
-#     end.index = [c.replace("_ENDING", "") for c in end.index]
-#     start["type"] = "starting"
-#     end["type"] = "ending"
-#
-#     compare = pd.concat([start, end], axis=1)
-#
-# ending_lmk = "1252008839062019090910572351658131"
-# starting_lmk = "1252008819542014122308482236142128"
-#
-# client = EpcClient(auth_token=EPC_AUTH_TOKEN)
-# result = client.domestic.search(params={"address": "Flat 14 Charles House, Freemens Way", "postcode": "CT14 9DL"})
-# starting_epc = [x for x in result["rows"] if x["lmk-key"] == starting_lmk][0]
-# ending_epc = [x for x in result["rows"] if x["lmk-key"] == ending_lmk][0]
-
-
-# with open(
-#     os.path.abspath(os.path.dirname(__file__)) + "/backend/tests/test_data/cleaned.pickle", "rb"
-# ) as f:
-#     cleaned = pickle.load(f)
-
-# with open(
-#     os.path.abspath(os.path.dirname(__file__)) + "/backend/tests/test_data/cleaning_data.pickle", "rb"
-# ) as f:
-#     cleaning_data = pickle.load(f)
-
-# TODO: Need to do floors, suspended and solid and to unheated space
-
-
-class TestSapModelPrep:
-
-    @pytest.fixture
-    def cleaning_data(self):
-        return read_dataframe_from_s3_parquet(
-            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
-        )
-
-    @pytest.fixture
-    def cleaned(self):
-        cleaned = read_from_s3(
-            s3_file_name="cleaned_epc_data/cleaned.bson",
-            bucket_name="retrofit-data-dev"
-        )
-
-        cleaned = msgpack.unpackb(cleaned, raw=False)
-        return cleaned
-
-    def test_fill_cavity_wall(self, cleaned, cleaning_data):
-        """
-        We ensure that the process that prepares the data in the engine code results in the same data as
-        the model is trained on
-        """
-
-        # This is an actual starting EPC
-        starting_epc = {
-            'low-energy-fixed-light-count': '', 'address': '26, Vicarage Lane, Eaton',
-            'uprn-source': 'Address Matched', 'floor-height': '2.39', 'heating-cost-potential': '942',
-            'unheated-corridor-length': '', 'hot-water-cost-potential': '97',
-            'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'D',
-            'mainheat-energy-eff': 'Average', 'windows-env-eff': 'Good', 'lighting-energy-eff': 'Average',
-            'environment-impact-potential': '53',
-            'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '1475',
-            'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
-            'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Melton',
-            'fixed-lighting-outlets-count': '', 'energy-tariff': 'Single',
-            'mechanical-ventilation': 'natural', 'hot-water-cost-current': '96', 'county': 'Leicestershire',
-            'postcode': 'NG32 1SP', 'solar-water-heating-flag': 'Y', 'constituency': 'E14000909',
-            'co2-emissions-potential': '5.7', 'number-heated-rooms': '7',
-            'floor-description': 'Suspended, no insulation (assumed)',
-            'energy-consumption-potential': '177', 'local-authority': 'E07000133', 'built-form': 'Detached',
-            'number-open-fireplaces': '1', 'windows-description': 'Fully double glazed',
-            'glazed-area': 'Normal', 'inspection-date': '2016-09-22', 'mains-gas-flag': 'N',
-            'co2-emiss-curr-per-floor-area': '87', 'address1': '26, Vicarage Lane',
-            'heat-loss-corridor': 'NO DATA!', 'flat-storey-count': '',
-            'constituency-label': 'Rutland and Melton', 'roof-energy-eff': 'Very Poor',
-            'total-floor-area': '116.0', 'building-reference-number': '4940047478',
-            'environment-impact-current': '29', 'co2-emissions-current': '10.0',
-            'roof-description': 'Pitched, limited insulation (assumed)', 'floor-energy-eff': 'NO DATA!',
-            'number-habitable-rooms': '7', 'address2': 'Eaton', 'hot-water-env-eff': 'Good',
-            'posttown': 'GRANTHAM', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'oil (not community)',
-            'lighting-env-eff': 'Average', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
-            'sheating-env-eff': 'N/A',
-            'lighting-description': 'Low energy lighting in 31% of fixed outlets',
-            'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Poor', 'photo-supply': '',
-            'lighting-cost-potential': '69', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
-            'main-heating-controls': '2106', 'lodgement-datetime': '2016-09-23 20:29:01',
-            'flat-top-storey': '', 'current-energy-rating': 'F',
-            'secondheat-description': 'Room heaters, dual fuel (mineral and wood)', 'walls-env-eff': 'Poor',
-            'transaction-type': 'marketed sale', 'uprn': '100030534042', 'current-energy-efficiency': '34',
-            'energy-consumption-current': '343', 'mainheat-description': 'Boiler and radiators, oil',
-            'lighting-cost-current': '117', 'lodgement-date': '2016-09-23', 'extension-count': '2',
-            'mainheatc-env-eff': 'Good', 'lmk-key': '1481856849902016092320290148762028',
-            'wind-turbine-count': '0', 'tenure': 'owner-occupied', 'floor-level': 'NODATA!',
-            'potential-energy-efficiency': '64', 'hot-water-energy-eff': 'Good',
-            'low-energy-lighting': '31',
-            'walls-description': 'Cavity wall, as built, no insulation (assumed)',
-            'hotwater-description': 'From main system, plus solar'
-        }
-
-        # This is the training data as we prepare it in the engine
-        # This is an actual record from the training data
-        row = {
-            'UPRN': '100030534042', 'RDSAP_CHANGE': 12, 'HEAT_DEMAND_CHANGE': -72,
-            'CARBON_CHANGE': -2.0999999999999996, 'SAP_STARTING': 34, 'SAP_ENDING': 46, 'HEAT_DEMAND_STARTING': 343,
-            'HEAT_DEMAND_ENDING': 271, 'CARBON_STARTING': 10.0, 'CARBON_ENDING': 7.9, 'PROPERTY_TYPE': 'House',
-            'BUILT_FORM': 'Detached', 'CONSTITUENCY': 'E14000909', 'NUMBER_HABITABLE_ROOMS': 7.0,
-            'NUMBER_HEATED_ROOMS': 7.0, 'FIXED_LIGHTING_OUTLETS_COUNT': 21.0,
-            'CONSTRUCTION_AGE_BAND': 'England and Wales: 1967-1975', 'TRANSACTION_TYPE_STARTING': 'marketed sale',
-            'MECHANICAL_VENTILATION_STARTING': 'natural',
-            'SECONDHEAT_DESCRIPTION_STARTING': 'Room heaters, dual fuel (mineral and wood)',
-            'ENERGY_TARIFF_STARTING': 'Single', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'Y',
-            'PHOTO_SUPPLY_STARTING': 0.0, 'GLAZED_TYPE_STARTING': 'double glazing installed during or after 2002',
-            'MULTI_GLAZE_PROPORTION_STARTING': 100.0, 'LOW_ENERGY_LIGHTING_STARTING': 31.0,
-            'NUMBER_OPEN_FIREPLACES_STARTING': 1.0, 'EXTENSION_COUNT_STARTING': 2.0,
-            'TOTAL_FLOOR_AREA_STARTING': 116.0, 'FLOOR_HEIGHT_STARTING': 2.39,
-            'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'natural',
-            'SECONDHEAT_DESCRIPTION_ENDING': 'Room heaters, dual fuel (mineral and wood)',
-            'ENERGY_TARIFF_ENDING': 'Single', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'Y', 'PHOTO_SUPPLY_ENDING': 0.0,
-            'GLAZED_TYPE_ENDING': 'double glazing installed during or after 2002',
-            'MULTI_GLAZE_PROPORTION_ENDING': 100.0, 'LOW_ENERGY_LIGHTING_ENDING': 31.0,
-            'NUMBER_OPEN_FIREPLACES_ENDING': 1.0, 'EXTENSION_COUNT_ENDING': 2.0, 'TOTAL_FLOOR_AREA_ENDING': 116.0,
-            'FLOOR_HEIGHT_ENDING': 2.41, 'DAYS_TO_STARTING': 784, 'DAYS_TO_ENDING': 867,
-            'walls_thermal_transmittance': 1.5, 'is_cavity_wall': True, 'is_filled_cavity': False,
-            'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False,
-            'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False,
-            'is_sandstone_or_limestone': False, 'is_park_home': False, 'walls_insulation_thickness': 'none',
-            'external_insulation': False, 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 0.7,
-            'is_park_home_ENDING': False, 'walls_insulation_thickness_ENDING': 'average',
-            'external_insulation_ENDING': False, 'internal_insulation_ENDING': False,
-            'floor_thermal_transmittance': 0.64, 'is_to_unheated_space': False, 'is_to_external_air': False,
-            'is_suspended': True, 'is_solid': False, 'another_property_below': False,
-            'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.64,
-            'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 1.5, 'is_pitched': True,
-            'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
-            'has_dwelling_above': False, 'roof_insulation_thickness': 'below average',
-            'roof_thermal_transmittance_ENDING': 1.5, 'roof_insulation_thickness_ENDING': 'below average',
-            'heater_type': 'Unknown', 'system_type': 'from main system', 'thermostat_characteristics': 'Unknown',
-            'heating_scope': 'Unknown', 'energy_recovery': 'Unknown', 'hotwater_tariff_type': 'Unknown',
-            'extra_features': 'plus solar', 'chp_systems': 'Unknown', 'distribution_system': 'Unknown',
-            'no_system_present': 'Unknown', 'appliance': 'Unknown', 'heater_type_ENDING': 'Unknown',
-            'system_type_ENDING': 'from main system', 'thermostat_characteristics_ENDING': 'Unknown',
-            'heating_scope_ENDING': 'Unknown', 'energy_recovery_ENDING': 'Unknown',
-            'hotwater_tariff_type_ENDING': 'Unknown', 'extra_features_ENDING': 'plus solar',
-            'chp_systems_ENDING': 'Unknown', 'distribution_system_ENDING': 'Unknown',
-            'no_system_present_ENDING': 'Unknown', 'appliance_ENDING': 'Unknown', 'has_radiators': True,
-            'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
-            'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True,
-            'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
-            'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
-            'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
-            'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
-            'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
-            'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
-            'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': True,
-            'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
-            'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
-            'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
-            'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
-            'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
-            'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
-            'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
-            'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
-            'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
-            'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
-            'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
-            'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
-            'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
-            'has_electric_ENDING': False, 'has_mains_gas_ENDING': False, 'has_wood_logs_ENDING': False,
-            'has_coal_ENDING': False, 'has_oil_ENDING': True, 'has_wood_pellets_ENDING': False,
-            'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
-            'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
-            'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
-            'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'room thermostat',
-            'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
-            'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
-            'auxiliary_systems': 'Unknown', 'trvs': 'trvs', 'rate_control': 'Unknown',
-            'thermostatic_control_ENDING': 'room thermostat', 'charging_system_ENDING': 'Unknown',
-            'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
-            'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
-            'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'trvs', 'rate_control_ENDING': 'Unknown',
-            'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'oil',
-            'main-fuel_tariff_type': 'Unknown', 'is_community': False,
-            'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
-            'fuel_type_ENDING': 'oil', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
-            'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
-            'estimated_perimeter_STARTING': 44.77882152472145, 'estimated_perimeter_ENDING': 44.77882152472145,
-            'HOT_WATER_ENERGY_EFF_STARTING': "Good",
-            "FLOOR_ENERGY_EFF_STARTING": "Unknown",
-            "WINDOWS_ENERGY_EFF_STARTING": "Good",
-            "WALLS_ENERGY_EFF_STARTING": "Poor",
-            "SHEATING_ENERGY_EFF_STARTING": "Unknown",
-            "ROOF_ENERGY_EFF_STARTING": "Very Poor",
-            "MAINHEAT_ENERGY_EFF_STARTING": "Average",
-            "MAINHEATC_ENERGY_EFF_STARTING": "Good",
-            "LIGHTING_ENERGY_EFF_STARTING": "Average",
-            "POTENTIAL_ENERGY_EFFICIENCY": 64,
-            "ENVIRONMENT_IMPACT_POTENTIAL": 53,
-            "ENERGY_CONSUMPTION_POTENTIAL": 177.0,
-            "CO2_EMISSIONS_POTENTIAL": 5.7,
-            "HOT_WATER_ENERGY_EFF_ENDING": "Good",
-            "FLOOR_ENERGY_EFF_ENDING": "Unknown",
-            "WINDOWS_ENERGY_EFF_ENDING": "Good",
-            "WALLS_ENERGY_EFF_ENDING": "Good",
-            "SHEATING_ENERGY_EFF_ENDING": "Unknown",
-            "ROOF_ENERGY_EFF_ENDING": "Very Poor",
-            "MAINHEAT_ENERGY_EFF_ENDING": "Average",
-            "MAINHEATC_ENERGY_EFF_ENDING": "Good",
-            "LIGHTING_ENERGY_EFF_ENDING": "Average",
-        }
-
-        home = Property(
-            id=0,
-            postcode=starting_epc["postcode"],
-            address1=starting_epc["address1"],
-            epc_client=EpcClient(auth_token="notoken"),
-            data=starting_epc
-        )
-        home.get_components(cleaned)
-
-        data_processor = DataProcessor(None, newdata=True)
-        data_processor.insert_data(pd.DataFrame([home.get_model_data()]))
-
-        data_processor.pre_process()
-
-        starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
-        ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
-        fixed_data = data_processor.get_fixed_features()
-
-        ending_lodgement_date = '2016-12-15'
-
-        ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(ending_lodgement_date)
-
-        recommendation = {
-            "recommendation_id": 0,
-            "new_u_value": 0.7,
-            "type": "wall_insulation"
-        }
-
-        test_record = create_recommendation_scoring_data(
-            property=home,
-            recommendation=recommendation,
-            starting_epc_data=starting_epc_data,
-            ending_epc_data=ending_epc_data,
-            fixed_data=fixed_data,
-        )
-        test_record = pd.DataFrame([test_record])
-
-        # Test the final cleaning:
-        test_record = DataProcessor.apply_averages_cleaning(
-            data_to_clean=test_record,
-            cleaning_data=cleaning_data,
-            cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
-        ).drop(columns=["LOCAL_AUTHORITY"])
-
-        test_record = DataProcessor.clean_missings_after_description_process(
-            test_record, [
-                c for c in test_record.columns if
-                ("thermal_transmittance" in c) or ("insulation_thickness" in c)
-            ]
-        )
-
-        # Test that the data has been set up correctly
-
-        # Things to fix:
-        # [] Filled cavity should have an average insulation thickness in the cleaned data
-
-        for c in test_record.columns:
-            if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
-                continue
-
-            if c == "FLOOR_HEIGHT_ENDING":
-                assert (row[c] - test_record[c].values[0]) <= 0.020001
-                continue
-
-            if c == "walls_insulation_thickness_ENDING":
-                assert row[c] == "average"
-                assert test_record[c].values[0] == "above average"
-                continue
-
-            assert test_record[c].values[0] == row[c]
-
-    def test_solid_wall_insulation(self, cleaned, cleaning_data):
-
-        starting_epc2 = {
-            'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY',
-            'uprn-source': 'Energy Assessor', 'floor-height': '3.64', 'heating-cost-potential': '465',
-            'unheated-corridor-length': '', 'hot-water-cost-potential': '185',
-            'construction-age-band': 'England and Wales: 1900-1929', 'potential-energy-rating': 'C',
-            'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Poor',
-            'environment-impact-potential': '51', 'glazed-type': 'double glazing installed during or after 2002',
-            'heating-cost-current': '1223', 'address3': '3 WESTERN GATEWAY',
-            'mainheatcont-description': 'Programmer and appliance thermostats', 'sheating-energy-eff': 'N/A',
-            'property-type': 'Flat', 'local-authority-label': 'Newham', 'fixed-lighting-outlets-count': '12',
-            'energy-tariff': 'off-peak 7 hour', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '342',
-            'county': '', 'postcode': 'E16 1BD', 'solar-water-heating-flag': 'N', 'constituency': 'E14001032',
-            'co2-emissions-potential': '3.6', 'number-heated-rooms': '2', 'floor-description': '(other premises below)',
-            'energy-consumption-potential': '307', 'local-authority': 'E09000025', 'built-form': 'Mid-Terrace',
-            'number-open-fireplaces': '0', 'windows-description': 'Partial double glazing', 'glazed-area': 'Normal',
-            'inspection-date': '2020-10-14', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '66',
-            'address1': 'FLAT 12', 'heat-loss-corridor': 'heated corridor', 'flat-storey-count': '',
-            'constituency-label': 'West Ham', 'roof-energy-eff': 'N/A', 'total-floor-area': '70.0',
-            'building-reference-number': '10000539740', 'environment-impact-current': '42',
-            'co2-emissions-current': '4.6', 'roof-description': '(another dwelling above)', 'floor-energy-eff': 'N/A',
-            'number-habitable-rooms': '2', 'address2': 'WAREHOUSE W', 'hot-water-env-eff': 'Poor', 'posttown': 'LONDON',
-            'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Poor',
-            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
-            'lighting-description': 'Low energy lighting in 17% of fixed outlets', 'roof-env-eff': 'N/A',
-            'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '67',
-            'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '61', 'main-heating-controls': '',
-            'lodgement-datetime': '2020-10-14 00:00:00', 'flat-top-storey': 'N', 'current-energy-rating': 'F',
-            'secondheat-description': 'None', 'walls-env-eff': 'Very Poor', 'transaction-type': 'marketed sale',
-            'uprn': '10012839482', 'current-energy-efficiency': '33', 'energy-consumption-current': '393',
-            'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '110',
-            'lodgement-date': '2020-10-14', 'extension-count': '0', 'mainheatc-env-eff': 'Good',
-            'lmk-key': 'b0d82f468273bec55ec5676a809b8e36b55db940ffa92f482a482f6aaa38eb1d', 'wind-turbine-count': '0',
-            'tenure': 'Owner-occupied', 'floor-level': '01', 'potential-energy-efficiency': '71',
-            'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '17',
-            'walls-description': 'Solid brick, as built, no insulation (assumed)',
-            'hotwater-description': 'Electric immersion, standard tariff'
-        }
-
-        row2 = {
-            'UPRN': '10012839482', 'RDSAP_CHANGE': 8, 'HEAT_DEMAND_CHANGE': -59,
-            'CARBON_CHANGE': -0.5999999999999996, 'SAP_STARTING': 33, 'SAP_ENDING': 41, 'HEAT_DEMAND_STARTING': 393,
-            'HEAT_DEMAND_ENDING': 334, 'CARBON_STARTING': 4.6, 'CARBON_ENDING': 4.0, 'PROPERTY_TYPE': 'Flat',
-            'BUILT_FORM': 'Mid-Terrace', 'CONSTITUENCY': 'E14001032', 'NUMBER_HABITABLE_ROOMS': 2.0,
-            'NUMBER_HEATED_ROOMS': 2.0, 'FIXED_LIGHTING_OUTLETS_COUNT': 12.0,
-            'CONSTRUCTION_AGE_BAND': 'England and Wales: 1996-2002', 'TRANSACTION_TYPE_STARTING': 'marketed sale',
-            'MECHANICAL_VENTILATION_STARTING': 'natural', 'SECONDHEAT_DESCRIPTION_STARTING': 'None',
-            'ENERGY_TARIFF_STARTING': 'off-peak 7 hour', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'N',
-            'PHOTO_SUPPLY_STARTING': 0.0, 'GLAZED_TYPE_STARTING': 'double glazing installed during or after 2002',
-            'MULTI_GLAZE_PROPORTION_STARTING': 61.0, 'LOW_ENERGY_LIGHTING_STARTING': 17.0,
-            'NUMBER_OPEN_FIREPLACES_STARTING': 0.0, 'EXTENSION_COUNT_STARTING': 0.0,
-            'TOTAL_FLOOR_AREA_STARTING': 70.0, 'FLOOR_HEIGHT_STARTING': 3.64,
-            'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'natural',
-            'SECONDHEAT_DESCRIPTION_ENDING': 'None', 'ENERGY_TARIFF_ENDING': 'off-peak 7 hour',
-            'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 0.0,
-            'GLAZED_TYPE_ENDING': 'double glazing installed during or after 2002',
-            'MULTI_GLAZE_PROPORTION_ENDING': 61.0, 'LOW_ENERGY_LIGHTING_ENDING': 17.0,
-            'NUMBER_OPEN_FIREPLACES_ENDING': 0.0, 'EXTENSION_COUNT_ENDING': 0.0, 'TOTAL_FLOOR_AREA_ENDING': 70.0,
-            'FLOOR_HEIGHT_ENDING': 3.64, 'DAYS_TO_STARTING': 2266, 'DAYS_TO_ENDING': 2307,
-            'walls_thermal_transmittance': 1.7, 'is_cavity_wall': False, 'is_filled_cavity': False,
-            'is_solid_brick': True, 'is_system_built': False, 'is_timber_frame': False,
-            'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False,
-            'is_sandstone_or_limestone': False, 'is_park_home': False, 'walls_insulation_thickness': 'none',
-            'external_insulation': False, 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 0.21,
-            'is_park_home_ENDING': False, 'walls_insulation_thickness_ENDING': 'average',
-            'external_insulation_ENDING': False, 'internal_insulation_ENDING': False,
-            'floor_thermal_transmittance': 0.0, 'is_to_unheated_space': False, 'is_to_external_air': False,
-            'is_suspended': False, 'is_solid': False, 'another_property_below': True,
-            'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.0,
-            'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 0.0, 'is_pitched': False,
-            'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
-            'has_dwelling_above': True, 'roof_insulation_thickness': 'none',
-            'roof_thermal_transmittance_ENDING': 0.0, 'roof_insulation_thickness_ENDING': 'none',
-            'heater_type': 'electric immersion', 'system_type': 'Unknown', 'thermostat_characteristics': 'Unknown',
-            'heating_scope': 'Unknown', 'energy_recovery': 'Unknown', 'hotwater_tariff_type': 'standard tariff',
-            'extra_features': 'Unknown', 'chp_systems': 'Unknown', 'distribution_system': 'Unknown',
-            'no_system_present': 'Unknown', 'appliance': 'Unknown', 'heater_type_ENDING': 'electric immersion',
-            'system_type_ENDING': 'Unknown', 'thermostat_characteristics_ENDING': 'Unknown',
-            'heating_scope_ENDING': 'Unknown', 'energy_recovery_ENDING': 'Unknown',
-            'hotwater_tariff_type_ENDING': 'standard tariff', 'extra_features_ENDING': 'Unknown',
-            'chp_systems_ENDING': 'Unknown', 'distribution_system_ENDING': 'Unknown',
-            'no_system_present_ENDING': 'Unknown', 'appliance_ENDING': 'Unknown', 'has_radiators': False,
-            'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
-            'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
-            'has_air_source_heat_pump': False, 'has_room_heaters': True, 'has_electric_storage_heaters': False,
-            'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
-            'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
-            'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
-            'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
-            'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': True,
-            'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
-            'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
-            'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
-            'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': False,
-            'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
-            'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
-            'has_boiler_ENDING': False, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': True,
-            'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
-            'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
-            'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
-            'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
-            'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
-            'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
-            'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
-            'has_electric_ENDING': True, 'has_mains_gas_ENDING': False, 'has_wood_logs_ENDING': False,
-            'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
-            'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
-            'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
-            'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
-            'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'appliance thermostats',
-            'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
-            'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
-            'auxiliary_systems': 'Unknown', 'trvs': 'Unknown', 'rate_control': 'Unknown',
-            'thermostatic_control_ENDING': 'appliance thermostats', 'charging_system_ENDING': 'Unknown',
-            'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
-            'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
-            'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'Unknown', 'rate_control_ENDING': 'Unknown',
-            'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'electricity',
-            'main-fuel_tariff_type': 'Unknown', 'is_community': False,
-            'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
-            'fuel_type_ENDING': 'electricity', 'main-fuel_tariff_type_ENDING': 'Unknown',
-            'is_community_ENDING': False, 'no_individual_heating_or_community_network_ENDING': False,
-            'complex_fuel_type_ENDING': 'Unknown', 'estimated_perimeter_STARTING': 35.4964786985977,
-            'estimated_perimeter_ENDING': 35.4964786985977,
-            'HOT_WATER_ENERGY_EFF_STARTING': "Very Poor",
-            "FLOOR_ENERGY_EFF_STARTING": "Unknown",
-            "WINDOWS_ENERGY_EFF_STARTING": "Average",
-            "WALLS_ENERGY_EFF_STARTING": "Very Poor",
-            "SHEATING_ENERGY_EFF_STARTING": "Unknown",
-            "ROOF_ENERGY_EFF_STARTING": "Unknown",
-            "MAINHEAT_ENERGY_EFF_STARTING": "Very Poor",
-            "MAINHEATC_ENERGY_EFF_STARTING": "Good",
-            "LIGHTING_ENERGY_EFF_STARTING": "Poor",
-            "POTENTIAL_ENERGY_EFFICIENCY": 71,
-            "ENVIRONMENT_IMPACT_POTENTIAL": 51,
-            "ENERGY_CONSUMPTION_POTENTIAL": 307,
-            "CO2_EMISSIONS_POTENTIAL": 3.6,
-            'HOT_WATER_ENERGY_EFF_ENDING': "Very Poor",
-            "FLOOR_ENERGY_EFF_ENDING": "Unknown",
-            "WINDOWS_ENERGY_EFF_ENDING": "Average",
-            "WALLS_ENERGY_EFF_ENDING": "Good",
-            "SHEATING_ENERGY_EFF_ENDING": "Unknown",
-            "ROOF_ENERGY_EFF_ENDING": "Unknown",
-            "MAINHEAT_ENERGY_EFF_ENDING": "Very Poor",
-            "MAINHEATC_ENERGY_EFF_ENDING": "Good",
-            "LIGHTING_ENERGY_EFF_ENDING": "Poor",
-        }
-
-        home2 = Property(
-            id=0,
-            postcode=starting_epc2["postcode"],
-            address1=starting_epc2["address1"],
-            epc_client=EpcClient(auth_token="notoken"),
-            data=starting_epc2
-        )
-        home2.get_components(cleaned)
-
-        data_processor2 = DataProcessor(None, newdata=True)
-        data_processor2.insert_data(pd.DataFrame([home2.get_model_data()]))
-
-        data_processor2.pre_process()
-
-        starting_epc_data2 = data_processor2.get_component_features(suffix="_STARTING")
-        ending_epc_data2 = data_processor2.get_component_features(suffix="_ENDING")
-        fixed_data2 = data_processor2.get_fixed_features()
-
-        ending_lodgement_date2 = '2020-11-24'
-
-        ending_epc_data2["DAYS_TO_ENDING"] = data_processor2.calculate_days_to(ending_lodgement_date2)
-
-        recommendation2 = {
-            "recommendation_id": 0,
-            "new_u_value": 0.21,
-            "type": "wall_insulation"
-        }
-
-        test_record2 = create_recommendation_scoring_data(
-            property=home2,
-            recommendation=recommendation2,
-            starting_epc_data=starting_epc_data2,
-            ending_epc_data=ending_epc_data2,
-            fixed_data=fixed_data2,
-        )
-        test_record2 = pd.DataFrame([test_record2])
-
-        # Test the final cleaning:
-        test_record2 = DataProcessor.apply_averages_cleaning(
-            data_to_clean=test_record2,
-            cleaning_data=cleaning_data,
-            cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
-        ).drop(columns=["LOCAL_AUTHORITY"])
-
-        test_record2 = DataProcessor.clean_missings_after_description_process(
-            test_record2, [
-                c for c in test_record2.columns if
-                ("thermal_transmittance" in c) or ("insulation_thickness" in c)
-            ]
-        )
-
-        for c in test_record2.columns:
-            if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
-                continue
-
-            if c == "FLOOR_HEIGHT_ENDING":
-                assert (row2[c] - test_record2[c].values[0]) <= 0.020001
-                continue
-
-            if c == "walls_insulation_thickness_ENDING":
-                assert row2[c] == "average"
-                assert test_record2[c].values[0] == "above average"
-                continue
-
-            if c == "CONSTRUCTION_AGE_BAND":
-                # For this, we have different values in the original data
-                assert row2[c] == "England and Wales: 1996-2002"
-                assert test_record2[c].values[0] == "England and Wales: 1900-1929"
-                continue
-
-            assert test_record2[c].values[0] == row2[c]
-
-    def test_ventilation(self, cleaned, cleaning_data):
-
-        starting_epc3 = {
-            'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor',
-            'floor-height': '1.87', 'heating-cost-potential': '645', 'unheated-corridor-length': '',
-            'hot-water-cost-potential': '69', 'construction-age-band': 'England and Wales: 1900-1929',
-            'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
-            'lighting-energy-eff': 'Average', 'environment-impact-potential': '75',
-            'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '1028', 'address3': '',
-            'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A',
-            'property-type': 'House', 'local-authority-label': 'Sheffield', 'fixed-lighting-outlets-count': '21',
-            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '96',
-            'county': '', 'postcode': 'S6 4FG', 'solar-water-heating-flag': 'N', 'constituency': 'E14000921',
-            'co2-emissions-potential': '2.9', 'number-heated-rooms': '5',
-            'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '152',
-            'local-authority': 'E08000019', 'built-form': 'Enclosed Mid-Terrace', 'number-open-fireplaces': '0',
-            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-06-13',
-            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '59', 'address1': '45 Shepperson Road',
-            'heat-loss-corridor': '', 'flat-storey-count': '',
-            'constituency-label': 'Sheffield, Brightside and Hillsborough', 'roof-energy-eff': 'Very Poor',
-            'total-floor-area': '107.0', 'building-reference-number': '10002892085', 'environment-impact-current': '46',
-            'co2-emissions-current': '6.3', 'roof-description': 'Pitched, no insulation (assumed)',
-            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good',
-            'posttown': 'SHEFFIELD', 'mainheatc-energy-eff': 'Average', 'main-fuel': 'mains gas (not community)',
-            'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
-            'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 43% of fixed outlets',
-            'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0',
-            'lighting-cost-potential': '83', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
-            'main-heating-controls': '', 'lodgement-datetime': '2023-05-27 12:15:21', 'flat-top-storey': '',
-            'current-energy-rating': 'E', 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor',
-            'transaction-type': 'marketed sale', 'uprn': '100051073214', 'current-energy-efficiency': '54',
-            'energy-consumption-current': '335', 'mainheat-description': 'Boiler and radiators, mains gas',
-            'lighting-cost-current': '131', 'lodgement-date': '2023-05-27', 'extension-count': '1',
-            'mainheatc-env-eff': 'Average',
-            'lmk-key': 'dc1a4da246562656132b8e36e0534cd90b09fa40fc584e25e644e2d9ab86a247', 'wind-turbine-count': '0',
-            'tenure': 'Not defined - use in the case of a new dwelling for which the intended tenure in not known. It '
-                      'is not to be used for an existing dwelling',
-            'floor-level': '', 'potential-energy-efficiency': '80', 'hot-water-energy-eff': 'Good',
-            'low-energy-lighting': '43',
-            'walls-description': 'Sandstone or limestone, as built, no insulation (assumed)',
-            'hotwater-description': 'From main system'
-        }
-
-        row3 = {
-            'UPRN': '100051073214', 'RDSAP_CHANGE': 2, 'HEAT_DEMAND_CHANGE': -22, 'CARBON_CHANGE': -0.39999999999999947,
-            'SAP_STARTING': 54, 'SAP_ENDING': 56, 'HEAT_DEMAND_STARTING': 335, 'HEAT_DEMAND_ENDING': 313,
-            'CARBON_STARTING': 6.3, 'CARBON_ENDING': 5.9, 'PROPERTY_TYPE': 'House', 'BUILT_FORM': 'Mid-Terrace',
-            'CONSTITUENCY': 'E14000921', 'NUMBER_HABITABLE_ROOMS': 5.0, 'NUMBER_HEATED_ROOMS': 5.0,
-            'FIXED_LIGHTING_OUTLETS_COUNT': 21.0, 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1900-1929',
-            'TRANSACTION_TYPE_STARTING': 'marketed sale', 'MECHANICAL_VENTILATION_STARTING': 'natural',
-            'SECONDHEAT_DESCRIPTION_STARTING': 'None', 'ENERGY_TARIFF_STARTING': 'Single',
-            'SOLAR_WATER_HEATING_FLAG_STARTING': 'N', 'PHOTO_SUPPLY_STARTING': 0.0,
-            'GLAZED_TYPE_STARTING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_STARTING': 100.0,
-            'LOW_ENERGY_LIGHTING_STARTING': 43.0, 'NUMBER_OPEN_FIREPLACES_STARTING': 0.0,
-            'EXTENSION_COUNT_STARTING': 1.0, 'TOTAL_FLOOR_AREA_STARTING': 107.0, 'FLOOR_HEIGHT_STARTING': 1.87,
-            'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'mechanical, extract only',
-            'SECONDHEAT_DESCRIPTION_ENDING': 'None', 'ENERGY_TARIFF_ENDING': 'Single',
-            'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 0.0,
-            'GLAZED_TYPE_ENDING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_ENDING': 100.0,
-            'LOW_ENERGY_LIGHTING_ENDING': 43.0, 'NUMBER_OPEN_FIREPLACES_ENDING': 0.0, 'EXTENSION_COUNT_ENDING': 1.0,
-            'TOTAL_FLOOR_AREA_ENDING': 107.0, 'FLOOR_HEIGHT_ENDING': 1.87, 'DAYS_TO_STARTING': 3221,
-            'DAYS_TO_ENDING': 2874, 'walls_thermal_transmittance': 2.0, 'is_cavity_wall': False,
-            'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False,
-            'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_sandstone_or_limestone': True,
-            'is_park_home': False, 'walls_insulation_thickness': 'none', 'external_insulation': False,
-            'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 2.0, 'is_park_home_ENDING': False,
-            'walls_insulation_thickness_ENDING': 'none', 'external_insulation_ENDING': False,
-            'internal_insulation_ENDING': False, 'floor_thermal_transmittance': 0.62, 'is_to_unheated_space': False,
-            'is_to_external_air': False, 'is_suspended': True, 'is_solid': False, 'another_property_below': False,
-            'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.62,
-            'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 2.3, 'is_pitched': True,
-            'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
-            'has_dwelling_above': False, 'roof_insulation_thickness': 'none', 'roof_thermal_transmittance_ENDING': 2.3,
-            'roof_insulation_thickness_ENDING': 'none', 'heater_type': 'Unknown', 'system_type': 'from main system',
-            'thermostat_characteristics': 'Unknown', 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown',
-            'hotwater_tariff_type': 'Unknown', 'extra_features': 'Unknown', 'chp_systems': 'Unknown',
-            'distribution_system': 'Unknown', 'no_system_present': 'Unknown', 'appliance': 'Unknown',
-            'heater_type_ENDING': 'Unknown', 'system_type_ENDING': 'from main system',
-            'thermostat_characteristics_ENDING': 'Unknown', 'heating_scope_ENDING': 'Unknown',
-            'energy_recovery_ENDING': 'Unknown', 'hotwater_tariff_type_ENDING': 'Unknown',
-            'extra_features_ENDING': 'Unknown', 'chp_systems_ENDING': 'Unknown',
-            'distribution_system_ENDING': 'Unknown', 'no_system_present_ENDING': 'Unknown',
-            'appliance_ENDING': 'Unknown', 'has_radiators': True, 'has_fan_coil_units': False,
-            'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
-            'has_pipes_in_concrete_slab': False, 'has_boiler': True, 'has_air_source_heat_pump': False,
-            'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False,
-            'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
-            'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
-            'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
-            'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
-            'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
-            'has_mains_gas': True, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
-            'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
-            'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
-            'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
-            'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
-            'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
-            'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
-            'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
-            'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
-            'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
-            'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
-            'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
-            'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
-            'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
-            'has_electric_ENDING': False, 'has_mains_gas_ENDING': True, 'has_wood_logs_ENDING': False,
-            'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
-            'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
-            'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
-            'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
-            'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'Unknown', 'charging_system': 'Unknown',
-            'switch_system': 'programmer', 'no_control': 'Unknown', 'dhw_control': 'Unknown',
-            'community_heating': 'Unknown', 'multiple_room_thermostats': False, 'auxiliary_systems': 'bypass',
-            'trvs': 'trvs', 'rate_control': 'Unknown', 'thermostatic_control_ENDING': 'Unknown',
-            'charging_system_ENDING': 'Unknown', 'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown',
-            'dhw_control_ENDING': 'Unknown', 'community_heating_ENDING': 'Unknown',
-            'multiple_room_thermostats_ENDING': False, 'auxiliary_systems_ENDING': 'bypass', 'trvs_ENDING': 'trvs',
-            'rate_control_ENDING': 'Unknown', 'glazing_type': 'double', 'glazing_type_ENDING': 'double',
-            'fuel_type': 'mains gas', 'main-fuel_tariff_type': 'Unknown', 'is_community': False,
-            'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
-            'fuel_type_ENDING': 'mains gas', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
-            'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
-            'estimated_perimeter_STARTING': 41.634120622393354, 'estimated_perimeter_ENDING': 41.634120622393354,
-            'HOT_WATER_ENERGY_EFF_STARTING': "Good",
-            "FLOOR_ENERGY_EFF_STARTING": "Unknown",
-            "WINDOWS_ENERGY_EFF_STARTING": "Average",
-            "WALLS_ENERGY_EFF_STARTING": "Very Poor",
-            "SHEATING_ENERGY_EFF_STARTING": "Unknown",
-            "ROOF_ENERGY_EFF_STARTING": "Very Poor",
-            "MAINHEAT_ENERGY_EFF_STARTING": "Good",
-            "MAINHEATC_ENERGY_EFF_STARTING": "Average",
-            "LIGHTING_ENERGY_EFF_STARTING": "Average",
-            "POTENTIAL_ENERGY_EFFICIENCY": 80,
-            "ENVIRONMENT_IMPACT_POTENTIAL": 75,
-            "ENERGY_CONSUMPTION_POTENTIAL": 152,
-            "CO2_EMISSIONS_POTENTIAL": 2.9,
-            'HOT_WATER_ENERGY_EFF_ENDING': "Good",
-            "FLOOR_ENERGY_EFF_ENDING": "Unknown",
-            "WINDOWS_ENERGY_EFF_ENDING": "Average",
-            "WALLS_ENERGY_EFF_ENDING": "Very Poor",
-            "SHEATING_ENERGY_EFF_ENDING": "Unknown",
-            "ROOF_ENERGY_EFF_ENDING": "Very Poor",
-            "MAINHEAT_ENERGY_EFF_ENDING": "Good",
-            "MAINHEATC_ENERGY_EFF_ENDING": "Average",
-            "LIGHTING_ENERGY_EFF_ENDING": "Average",
-        }
-
-        home3 = Property(
-            id=0,
-            postcode=starting_epc3["postcode"],
-            address1=starting_epc3["address1"],
-            epc_client=EpcClient(auth_token="notoken"),
-            data=starting_epc3
-        )
-        home3.get_components(cleaned)
-
-        data_processor3 = DataProcessor(None, newdata=True)
-        data_processor3.insert_data(pd.DataFrame([home3.get_model_data()]))
-
-        data_processor3.pre_process()
-
-        starting_epc_data3 = data_processor3.get_component_features(suffix="_STARTING")
-        ending_epc_data3 = data_processor3.get_component_features(suffix="_ENDING")
-        fixed_data3 = data_processor3.get_fixed_features()
-
-        ending_lodgement_date3 = '2022-06-14'
-
-        ending_epc_data3["DAYS_TO_ENDING"] = data_processor3.calculate_days_to(ending_lodgement_date3)
-
-        recommendation3 = {
-            "recommendation_id": 0,
-            "type": "mechanical_ventilation"
-        }
-
-        test_record3 = create_recommendation_scoring_data(
-            property=home3,
-            recommendation=recommendation3,
-            starting_epc_data=starting_epc_data3,
-            ending_epc_data=ending_epc_data3,
-            fixed_data=fixed_data3,
-        )
-        test_record3 = pd.DataFrame([test_record3])
-
-        # Test the final cleaning:
-        test_record3 = DataProcessor.apply_averages_cleaning(
-            data_to_clean=test_record3,
-            cleaning_data=cleaning_data,
-            cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
-        ).drop(columns=["LOCAL_AUTHORITY"])
-
-        test_record3 = DataProcessor.clean_missings_after_description_process(
-            test_record3, [
-                c for c in test_record3.columns if
-                ("thermal_transmittance" in c) or ("insulation_thickness" in c)
-            ]
-        )
-
-        for c in test_record3.columns:
-            if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
-                continue
-
-            assert test_record3[c].values[0] == row3[c]
-
-    def test_fireplaces(self, cleaned, cleaning_data):
-
-        starting_epc4 = {
-            'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill',
-            'uprn-source': 'Energy Assessor', 'floor-height': '2.4', 'heating-cost-potential': '501',
-            'unheated-corridor-length': '', 'hot-water-cost-potential': '70',
-            'construction-age-band': 'England and Wales: 1930-1949', 'potential-energy-rating': 'C',
-            'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
-            'environment-impact-potential': '76', 'glazed-type': 'double glazing, unknown install date',
-            'heating-cost-current': '723', 'address3': '',
-            'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A',
-            'property-type': 'House', 'local-authority-label': 'Melton',
-            'fixed-lighting-outlets-count': '14', 'energy-tariff': 'dual',
-            'mechanical-ventilation': 'natural', 'hot-water-cost-current': '98',
-            'county': 'Leicestershire', 'postcode': 'LE14 3QT', 'solar-water-heating-flag': 'N',
-            'constituency': 'E14000909', 'co2-emissions-potential': '2.4', 'number-heated-rooms': '5',
-            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '153',
-            'local-authority': 'E07000133', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '1',
-            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
-            'inspection-date': '2022-06-27', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '46',
-            'address1': '9 Glebe Road', 'heat-loss-corridor': '', 'flat-storey-count': '',
-            'constituency-label': 'Rutland and Melton', 'roof-energy-eff': 'Good',
-            'total-floor-area': '87.0', 'building-reference-number': '10002396876',
-            'environment-impact-current': '60', 'co2-emissions-current': '4.0',
-            'roof-description': 'Pitched, 200 mm loft insulation', 'floor-energy-eff': 'N/A',
-            'number-habitable-rooms': '5', 'address2': 'Asfordby Hill', 'hot-water-env-eff': 'Good',
-            'posttown': 'MELTON MOWBRAY', 'mainheatc-energy-eff': 'Average',
-            'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Average',
-            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
-            'lighting-description': 'Low energy lighting in 29% of fixed outlets', 'roof-env-eff': 'Good',
-            'walls-energy-eff': 'Very Poor', 'photo-supply': '15.0', 'lighting-cost-potential': '79',
-            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
-            'lodgement-datetime': '2022-06-27 15:28:18', 'flat-top-storey': '',
-            'current-energy-rating': 'D',
-            'secondheat-description': 'Room heaters, dual fuel (mineral and wood)',
-            'walls-env-eff': 'Very Poor', 'transaction-type': 'ECO assessment', 'uprn': '100030539619',
-            'current-energy-efficiency': '66', 'energy-consumption-current': '256',
-            'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '135',
-            'lodgement-date': '2022-06-27', 'extension-count': '1', 'mainheatc-env-eff': 'Average',
-            'lmk-key': '736b6f4803a11d9e45b49bf98f36eb8a7f357b0dd24f3e7cddef5295518e5bef',
-            'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
-            'potential-energy-efficiency': '78', 'hot-water-energy-eff': 'Good',
-            'low-energy-lighting': '29',
-            'walls-description': 'Solid brick, as built, no insulation (assumed)',
-            'hotwater-description': 'From main system'
-        }
-
-        row4 = {
-            'UPRN': '100030539619', 'RDSAP_CHANGE': 7, 'HEAT_DEMAND_CHANGE': -41, 'CARBON_CHANGE': -0.5,
-            'SAP_STARTING': 66, 'SAP_ENDING': 73, 'HEAT_DEMAND_STARTING': 256, 'HEAT_DEMAND_ENDING': 215,
-            'CARBON_STARTING': 4.0, 'CARBON_ENDING': 3.5, 'PROPERTY_TYPE': 'House', 'BUILT_FORM': 'Semi-Detached',
-            'CONSTITUENCY': 'E14000909', 'NUMBER_HABITABLE_ROOMS': 5.0, 'NUMBER_HEATED_ROOMS': 5.0,
-            'FIXED_LIGHTING_OUTLETS_COUNT': 14.0, 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1930-1949',
-            'TRANSACTION_TYPE_STARTING': 'eco assessment', 'MECHANICAL_VENTILATION_STARTING': 'natural',
-            'SECONDHEAT_DESCRIPTION_STARTING': 'Room heaters, dual fuel (mineral and wood)',
-            'ENERGY_TARIFF_STARTING': 'dual', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'N', 'PHOTO_SUPPLY_STARTING': 15.0,
-            'GLAZED_TYPE_STARTING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_STARTING': 100.0,
-            'LOW_ENERGY_LIGHTING_STARTING': 29.0, 'NUMBER_OPEN_FIREPLACES_STARTING': 1.0,
-            'EXTENSION_COUNT_STARTING': 1.0, 'TOTAL_FLOOR_AREA_STARTING': 87.0, 'FLOOR_HEIGHT_STARTING': 2.4,
-            'TRANSACTION_TYPE_ENDING': 'eco assessment', 'MECHANICAL_VENTILATION_ENDING': 'natural',
-            'SECONDHEAT_DESCRIPTION_ENDING': 'Room heaters, dual fuel (mineral and wood)',
-            'ENERGY_TARIFF_ENDING': 'dual', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 15.0,
-            'GLAZED_TYPE_ENDING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_ENDING': 100.0,
-            'LOW_ENERGY_LIGHTING_ENDING': 29.0, 'NUMBER_OPEN_FIREPLACES_ENDING': 0, 'EXTENSION_COUNT_ENDING': 1.0,
-            'TOTAL_FLOOR_AREA_ENDING': 87.0, 'FLOOR_HEIGHT_ENDING': 2.4, 'DAYS_TO_STARTING': 2887,
-            'DAYS_TO_ENDING': 2960, 'walls_thermal_transmittance': 1.7, 'is_cavity_wall': False,
-            'is_filled_cavity': False, 'is_solid_brick': True, 'is_system_built': False, 'is_timber_frame': False,
-            'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_sandstone_or_limestone': False,
-            'is_park_home': False, 'walls_insulation_thickness': 'none', 'external_insulation': False,
-            'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 1.7, 'is_park_home_ENDING': False,
-            'walls_insulation_thickness_ENDING': 'none', 'external_insulation_ENDING': False,
-            'internal_insulation_ENDING': False, 'floor_thermal_transmittance': 0.66, 'is_to_unheated_space': False,
-            'is_to_external_air': False, 'is_suspended': False, 'is_solid': True, 'another_property_below': False,
-            'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.66,
-            'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 0.21, 'is_pitched': True,
-            'is_roof_room': False, 'is_loft': True, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
-            'has_dwelling_above': False, 'roof_insulation_thickness': '200', 'roof_thermal_transmittance_ENDING': 0.21,
-            'roof_insulation_thickness_ENDING': '200', 'heater_type': 'Unknown', 'system_type': 'from main system',
-            'thermostat_characteristics': 'Unknown', 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown',
-            'hotwater_tariff_type': 'Unknown', 'extra_features': 'Unknown', 'chp_systems': 'Unknown',
-            'distribution_system': 'Unknown', 'no_system_present': 'Unknown', 'appliance': 'Unknown',
-            'heater_type_ENDING': 'Unknown', 'system_type_ENDING': 'from main system',
-            'thermostat_characteristics_ENDING': 'Unknown', 'heating_scope_ENDING': 'Unknown',
-            'energy_recovery_ENDING': 'Unknown', 'hotwater_tariff_type_ENDING': 'Unknown',
-            'extra_features_ENDING': 'Unknown', 'chp_systems_ENDING': 'Unknown',
-            'distribution_system_ENDING': 'Unknown', 'no_system_present_ENDING': 'Unknown',
-            'appliance_ENDING': 'Unknown', 'has_radiators': True, 'has_fan_coil_units': False,
-            'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
-            'has_pipes_in_concrete_slab': False, 'has_boiler': True, 'has_air_source_heat_pump': False,
-            'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False,
-            'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
-            'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
-            'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
-            'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
-            'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
-            'has_mains_gas': True, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
-            'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
-            'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
-            'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
-            'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
-            'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
-            'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
-            'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
-            'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
-            'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
-            'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
-            'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
-            'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
-            'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
-            'has_electric_ENDING': False, 'has_mains_gas_ENDING': True, 'has_wood_logs_ENDING': False,
-            'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
-            'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
-            'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
-            'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
-            'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'room thermostat',
-            'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
-            'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
-            'auxiliary_systems': 'Unknown', 'trvs': 'Unknown', 'rate_control': 'Unknown',
-            'thermostatic_control_ENDING': 'room thermostat', 'charging_system_ENDING': 'Unknown',
-            'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
-            'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
-            'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'Unknown', 'rate_control_ENDING': 'Unknown',
-            'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'mains gas',
-            'main-fuel_tariff_type': 'Unknown', 'is_community': False,
-            'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
-            'fuel_type_ENDING': 'mains gas', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
-            'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
-            'estimated_perimeter_STARTING': 37.54197650630557, 'estimated_perimeter_ENDING': 37.54197650630557,
-            'HOT_WATER_ENERGY_EFF_STARTING': "Good",
-            "FLOOR_ENERGY_EFF_STARTING": "Unknown",
-            "WINDOWS_ENERGY_EFF_STARTING": "Average",
-            "WALLS_ENERGY_EFF_STARTING": "Very Poor",
-            "SHEATING_ENERGY_EFF_STARTING": "Unknown",
-            "ROOF_ENERGY_EFF_STARTING": "Good",
-            "MAINHEAT_ENERGY_EFF_STARTING": "Good",
-            "MAINHEATC_ENERGY_EFF_STARTING": "Average",
-            "LIGHTING_ENERGY_EFF_STARTING": "Average",
-            "POTENTIAL_ENERGY_EFFICIENCY": 78,
-            "ENVIRONMENT_IMPACT_POTENTIAL": 76,
-            "ENERGY_CONSUMPTION_POTENTIAL": 153,
-            "CO2_EMISSIONS_POTENTIAL": 2.4,
-            'HOT_WATER_ENERGY_EFF_ENDING': "Good",
-            "FLOOR_ENERGY_EFF_ENDING": "Unknown",
-            "WINDOWS_ENERGY_EFF_ENDING": "Average",
-            "WALLS_ENERGY_EFF_ENDING": "Very Poor",
-            "SHEATING_ENERGY_EFF_ENDING": "Unknown",
-            "ROOF_ENERGY_EFF_ENDING": "Good",
-            "MAINHEAT_ENERGY_EFF_ENDING": "Good",
-            "MAINHEATC_ENERGY_EFF_ENDING": "Average",
-            "LIGHTING_ENERGY_EFF_ENDING": "Average",
-        }
-
-        home4 = Property(
-            id=0,
-            postcode=starting_epc4["postcode"],
-            address1=starting_epc4["address1"],
-            epc_client=EpcClient(auth_token="notoken"),
-            data=starting_epc4
-        )
-        home4.get_components(cleaned)
-
-        data_processor4 = DataProcessor(None, newdata=True)
-        data_processor4.insert_data(pd.DataFrame([home4.get_model_data()]))
-
-        data_processor4.pre_process()
-
-        starting_epc_data4 = data_processor4.get_component_features(suffix="_STARTING")
-        ending_epc_data4 = data_processor4.get_component_features(suffix="_ENDING")
-        fixed_data4 = data_processor4.get_fixed_features()
-
-        ending_lodgement_date4 = '2022-09-08'
-
-        ending_epc_data4["DAYS_TO_ENDING"] = data_processor4.calculate_days_to(ending_lodgement_date4)
-
-        recommendation4 = {
-            "recommendation_id": 0,
-            "type": "sealing_open_fireplace"
-        }
-
-        test_record4 = create_recommendation_scoring_data(
-            property=home4,
-            recommendation=recommendation4,
-            starting_epc_data=starting_epc_data4,
-            ending_epc_data=ending_epc_data4,
-            fixed_data=fixed_data4,
-        )
-        test_record4 = pd.DataFrame([test_record4])
-
-        # Test the final cleaning:
-        test_record4 = DataProcessor.apply_averages_cleaning(
-            data_to_clean=test_record4,
-            cleaning_data=cleaning_data,
-            cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
-        ).drop(columns=["LOCAL_AUTHORITY"])
-
-        test_record4 = DataProcessor.clean_missings_after_description_process(
-            test_record4, [
-                c for c in test_record4.columns if
-                ("thermal_transmittance" in c) or ("insulation_thickness" in c)
-            ]
-        )
-
-        for c in test_record4.columns:
-            if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
-                continue
-
-            assert test_record4[c].values[0] == row4[c]
--- a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
+++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
@ -0,0 +1,78 @@
+import pandas as pd
+from tqdm import tqdm
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from etl.epc.settings import EARLIEST_EPC_DATE
+
+logger = setup_logger()
+
+
+class AirSourceHeatPumpEfficiency:
+
+    def __init__(self, file_directories, cleaned_lookup):
+        """
+        :param file_directories: A list of directories where files are stored.
+        :param cleaned_lookup: A dictionary containing cleaned lookup data.
+        """
+        self.file_directories = file_directories
+        self.cleaned_lookup = cleaned_lookup
+
+        self.results = []
+
+    def create_dataset(self):
+        logger.info("Creating solar photo supply dataset")
+        for dir in tqdm(self.file_directories):
+            filepath = dir / "certificates.csv"
+            df = pd.read_csv(filepath, low_memory=False)
+            df = df[~pd.isnull(df["UPRN"])]
+            df["UPRN"] = df["UPRN"].astype(int).astype(str)
+            # Take entries after SAP12
+            df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
+            df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]
+
+            df = df[
+                ~df["TENURE"].isin(
+                    [
+                        "unknown",
+                        "Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
+                        "It is not to be used for an existing dwelling"
+                    ]
+                )
+            ]
+
+            # Take entries that contain an air source heat pump
+            df = df[
+                df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
+            ]
+            # Get the columns we're interested in
+            df = df[
+                [
+                    "MAINHEAT_DESCRIPTION",
+                    "MAINHEAT_ENERGY_EFF",
+                    "MAINHEATCONT_DESCRIPTION",
+                    "MAINHEATC_ENERGY_EFF",
+                    "MAIN_FUEL",
+                    "HOTWATER_DESCRIPTION",
+                    "HOT_WATER_ENERGY_EFF",
+                    "MAINS_GAS_FLAG"
+                ]
+            ]
+
+            counts = df.groupby(
+                [
+                    "MAINHEAT_DESCRIPTION",
+                    "MAINHEAT_ENERGY_EFF",
+                    "MAINHEATCONT_DESCRIPTION",
+                    "MAINHEATC_ENERGY_EFF",
+                    "MAIN_FUEL",
+                    "HOTWATER_DESCRIPTION",
+                    "HOT_WATER_ENERGY_EFF",
+                    "MAINS_GAS_FLAG"
+                ]
+            ).size().reset_index(name="count")
+
+            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+                df = df[~pd.isnull(df[col])]
+            # Take newest LODGEMENT_DATE per UPRN
+            df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
--- a/etl/air_source_heat_pump/app.py
+++ b/etl/air_source_heat_pump/app.py
@ -0,0 +1,24 @@
+from pathlib import Path
+from backend.app.plan.utils import get_cleaned
+from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+    """
+    This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source
+    heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know
+    how to set the simulation
+    :return:
+    """
+
+    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+    cleaned_lookup = get_cleaned()
+
+    ashp_data_client = AirSourceHeatPumpEfficiency(
+        file_directories=directories,
+        cleaned_lookup=cleaned_lookup
+    )
+
+    ashp_data_client.create_dataset()
--- a/etl/costs/app.py
+++ b/etl/costs/app.py
@ -73,6 +73,9 @@ def app():
    suspended_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="suspended_floor_insulation", header=0)
    solid_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="solid_floor_insulation", header=0)
    ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
+    lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
+    flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
+    window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)

    # Form a single table to be uploaded
    costs = pd.concat(
@ -83,6 +86,8 @@ def app():
            suspended_floor_costs,
            solid_floor_costs,
            ewi_costs,
+            lel_costs,
+            flat_roof_costs
        ]
    )

--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@ -0,0 +1,211 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 67
+
+archetype_1_uprns = [100020604138, 200001188299, 100020578756, 200001187196, 200001192253, 100020581792, 200001188304,
+                     100020625813, 100020618060, 100020585305, 100020617489, 100020615039, 100020618076, 100020588913,
+                     200001187197, 100020671205, 100020576940, 100020619814, 100020576472, 100020618083]
+archetype_2_uprns = [100020698027, 10001007455, 100020653785, 10090383198, 100020665632, 100020620659, 100020615603,
+                     100020609610, 100020625597, 100020665656, 100020665640, 100020587905, 100020665630, 100020624351,
+                     100020625451, 100020624348, 100020666735, 100020653786, 100020576458, 100020657902, 100020624350,
+                     100020637405, 100020666734, 100020616325, 100020666716, 100020653783, 100020665645, 100020642337,
+                     100020665638, 100022904981, 100020688226, 100020630285, 100020626800, 100020665634, 100022907528,
+                     100020665652, 100020624347, 100020666721, 100020585002, 10014055968, 10001008257, 100020621438,
+                     100020576459, 100020665643, 100020665654, 100022917303]
+archetype_3_uprns = [100020577523, 100020616446, 100020605342, 100020594652, 100020585394, 100020601138, 100020597485,
+                     100020614883, 100020633162, 100020697787, 200001185785, 100020646842, 100020581449, 100020595611,
+                     100020641814, 100020575611, 100020652986, 100020654671, 100020647336, 100020610518, 100020607980,
+                     100020692380, 100020581690]
+archetype_4_uprns = [100020650603, 100020582907, 100020605116, 100020650607, 100020589325, 100020655500, 100020642537,
+                     200001187539, 100020631683, 100020610165, 100020596436, 100020598277, 100020660228]
+
+
+def app():
+    """
+    We shall define a small portfolio of properties, based in Croydon
+    :return:
+    """
+
+    # Firstly, read in the EPC data for Croydon
+    epc_data = pd.read_csv(
+        "local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv",
+        low_memory=False
+    )
+
+    z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
+    z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
+
+    # Filter on entries where we have a UPRN
+    epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+
+    # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+    epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
+
+    epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
+
+    # Now filter on social properties
+    epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])]
+    # There are 17337 properties with a registered EPC in Croydon
+    # Take below EPC C properties
+    epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69]
+    # 7994 properties are below EPC C (46%)
+
+    # 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
+    epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
+
+    # For the purpose of the sample, take the properties have surveys done in the last 3 years
+    # This gives us 1351 remaining properties
+    three_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(3 * 365))
+    epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= three_years_ago]
+
+    # Archetype 1: defined below:
+    # 1) House
+    # 2) Unfilled cavity
+    # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
+    # 4) EPC E or D
+    # 24 properties
+    archetype_1_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["House"]) &
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E"])) &
+        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
+        epc_data["ROOF_DESCRIPTION"].isin(
+            [
+                "Pitched, 12 mm loft insulation",
+                "Pitched, 0 mm loft insulation",
+                "Pitched, no insulation",
+                "Pitched, 50 mm loft insulation",
+                "Flat, no insulation (assumed)",
+                "Pitched, no insulation (assumed)"
+            ]
+        )
+        ]
+    archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1"
+
+    # Archetype 2: defined below:
+    # 1) Flat
+    # 2) Unfilled cavity
+    # 3) Another property above
+    # 4) EPC E
+    # 57 properties here
+    archetype_2_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "D"])) &
+        epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
+        epc_data["ROOF_DESCRIPTION"].isin(
+            [
+                "(another dwelling above)"
+            ]
+        )
+        ]
+    archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
+
+    # Archetype 3: defined below:
+    # 1) EPC E or below
+    # 2) Solid brick wall
+    # 3) House
+    # 4) Pitched roof with no insulation
+    # Just 7 properties (more expensive to retrofit)
+    archetype_3_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["House"]) &
+        (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"])) &
+        epc_data["WALLS_DESCRIPTION"].isin(["Solid brick, as built, no insulation (assumed)"]) &
+        epc_data["ROOF_DESCRIPTION"].isin(
+            [
+                "Pitched, no insulation",
+                "Pitched, limited insulation (assumed)",
+                "Pitched, 100 mm loft insulation",
+                "Pitched, no insulation (assumed)",
+            ]
+        )
+        ]
+    archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
+
+    # Archetype 4: defined below:
+    # 1) Maisonette
+    # 2) Empty cavity
+    # 3) EPC E
+    # 16 properties here
+    archetype_4_sample = epc_data[
+        epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
+        epc_data["WALLS_DESCRIPTION"].isin(
+            ["Cavity wall, as built, no insulation (assumed)"]
+        )
+        ]
+
+    archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+    archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
+
+    asset_list = pd.concat(
+        [
+            archetype_1_sample_asset_list,
+            archetype_2_sample_asset_list,
+            archetype_3_sample_asset_list,
+            archetype_4_sample_asset_list
+        ]
+    )
+
+    asset_list = asset_list.rename(
+        columns={
+            "UPRN": "uprn",
+            "ADDRESS1": "address",
+            "POSTCODE": "postcode",
+            "ARCHETYPE": "archetype"
+        }
+    )
+
+    asset_list["uprn"] = asset_list["uprn"].astype(int)
+
+    # We end up with some properties that are currently an EPC C, but we do not have this data in the download, so we
+    # manually remove
+    # 1) 3 Reid Close, CR5 3BL
+    # 2) Flat 6, Collier Court 2A, St. Peters Road CR0 1HD
+    asset_list = asset_list[
+        ~asset_list["uprn"].isin(
+            [
+                100020576460,
+                100020624352,
+            ]
+        )
+    ]
+    # We have slightly too many properties, so we take a random sample of each archetype
+    # achetype_1_size = 20
+    # achetype_2_size = 46
+    # achetype_3_size = 23
+    # achetype_4_size = 13
+    # archetype_1_uprns = asset_list[asset_list["archetype"] == "Archetype 1"]["uprn"].sample(
+    #     int(achetype_1_size)
+    # ).tolist()
+    # archetype_2_uprns = asset_list[asset_list["archetype"] == "Archetype 2"]["uprn"].sample(
+    #     int(achetype_2_size)
+    # ).tolist()
+    # archetype_3_uprns = asset_list[asset_list["archetype"] == "Archetype 3"]["uprn"].sample(
+    #     int(achetype_3_size)
+    # ).tolist()
+    # archetype_4_uprns = asset_list[asset_list["archetype"] == "Archetype 4"]["uprn"].sample(
+    #     int(achetype_4_size)
+    # ).tolist()
+    uprns_to_keep = archetype_1_uprns + archetype_2_uprns + archetype_3_uprns + archetype_4_uprns
+    asset_list = asset_list[asset_list["uprn"].isin(uprns_to_keep)]
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "budget": None,
+        "exclusions": ["floor_insulation"]
+    }
+    print(body)
--- a/etl/customers/gla_croydon_demo/slides.py
+++ b/etl/customers/gla_croydon_demo/slides.py
@ -0,0 +1,760 @@
+"""
+This script contains the code to generate the data required to populate the slides
+We connect to the database amd extract the data for the portfolio needed so it is recommended to use
+a environment akin to the backend to run this script
+"""
+import pandas as pd
+import numpy as np
+from backend.app.db.connection import db_engine
+from sqlalchemy.orm import sessionmaker
+from utils.s3 import read_csv_from_s3
+from etl.customers.slide_utils import (
+    plot_epc_distribution,
+    get_property_details_by_portfolio_id,
+    get_plan_by_portfolio_id,
+    get_properties_with_default_recommendations,
+    create_powerpoint,
+    create_recommendations_summary
+)
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+
+USER_ID = 8
+PORTFOLIO_ID_1 = 67
+PORTFOLIO_ID_2 = 68
+EPC_TARGET_1 = "C"
+EPC_TARGET_2 = "A"
+SAP_TARGET_1 = 69
+SAP_TARGET_2 = 100
+CUSTOMER_KEY = "gla-demo"
+
+# Sample UPRNS
+archetype_1_sample = ['100020604138', '200001192253', '100020581792', '100020576940', '200001187196', '100020618060',
+                      '100020625813', '100020578756', '100020618076', '200001187197', '100020619814', '100020617489',
+                      '100020588913']
+
+archetype_2_sample = ['100020585002', '100020615603', '100020665652', '100020626800', '100020624347', '100020624348',
+                      '100020576459', '10001007455', '100020666716', '100020609610', '100020625451', '100020625597',
+                      '100020624351', '100020665634', '100020624350', '100020665640', '100020665632', '100022917303',
+                      '100020665656', '10014055968', '100020630285', '100020665638', '100020616325', '100020637405',
+                      '100020698027', '100020657902', '100020688226', '100020653786', '100020642337', '100020665643']
+
+archetype_3_sample = ['100020594652', '100020697787', '100020577523', '100020633162', '100020601138', '100020595611',
+                      '100020597485', '100020614883', '100020605342', '100020654671', '100020575611', '100020607980',
+                      '200001185785', '100020616446', '100020692380']
+
+archetype_4_sample = ['100020596436', '100020610165', '200001187539', '100020655500', '100020582907', '100020598277',
+                      '100020650607', '100020605116', '100020650603']
+
+
+def scenario_1():
+    # Connect to database
+    session = sessionmaker(bind=db_engine)()
+
+    ########################################################################
+    # Get the data we need
+    ########################################################################
+
+    portfolio_id = PORTFOLIO_ID_1
+
+    # Get the asset list
+    asset_list = read_csv_from_s3(
+        "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+    )
+    asset_list = pd.DataFrame(asset_list)
+
+    # Get the properties for the portfolio
+    properties = get_properties_with_default_recommendations(session, portfolio_id)
+    properties_df = pd.DataFrame(properties)
+
+    # We now pull the data for the property details
+    property_details = get_property_details_by_portfolio_id(session, portfolio_id)
+    property_details_df = pd.DataFrame(property_details)
+    # We estimate bills based on the adjusted_energy_consumption
+    property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
+        lambda x: AnnualBillSavings.calculate_annual_bill(x)
+    )
+    # Merge on uprn
+    property_details_df = property_details_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
+
+    plans = get_plan_by_portfolio_id(session, portfolio_id)
+    plans_df = pd.DataFrame(plans)
+
+    # Unnest the recommendations. Each recommendation is a list of dictionaries
+    recommendations_exploded = properties_df["recommendations"].explode().tolist()
+    recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+    # Add uprn on
+    recommendations_df = recommendations_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        how="left",
+        on="property_id"
+    )
+
+    recommendations_summary = create_recommendations_summary(
+        recommendations_df,
+        properties_df,
+        property_details_df,
+        SAP_TARGET_1
+    )
+
+    # Calculate % changes of energ, co2 and abs
+    recommendations_summary["carbon_percent_change"] = (
+        recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
+    )
+
+    recommendations_summary["energy_percent_change"] = (
+        recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
+    )
+
+    recommendations_summary["bills_percent_change"] = (
+        recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
+    )
+
+    ########################
+    # Overview
+    ########################
+    overview_totals = recommendations_summary.sum()
+    overview_means = recommendations_summary.mean()
+
+    ########################
+    # Measures
+    ########################
+    measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+    wall_insulation_measures = measures_count[
+        measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
+    ]["id"].sum()
+    ventilation_measures = measures_count[
+        measures_count["type"].isin(["mechanical_ventilation"])
+    ]["id"].sum()
+    roof_insulation_measures = measures_count[
+        measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
+    ]["id"].sum()
+    floor_insulation_measures = measures_count[
+        measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
+    ]["id"].sum()
+    windows = measures_count[
+        measures_count["type"].isin(["windows_glazing"])
+    ]["id"].sum()
+    heating = measures_count[
+        measures_count["type"].isin(["heating"])
+    ]["id"].sum()
+    heating_controls = measures_count[
+        measures_count["type"].isin(["heating_control"])
+    ]["id"].sum()
+    solar = measures_count[
+        measures_count["type"].isin(["solar_pv"])
+    ]["id"].sum()
+    other = measures_count[
+        ~measures_count["type"].isin([
+            "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
+            "loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
+            "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
+            "mechanical_ventilation"
+        ])
+    ]["id"].sum()
+
+    # Summary information by each archetype
+    ########################
+    # Archetype 1
+    ########################
+    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+    recommendations_arch_1_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+
+    arch_1_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+    arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+    # Take the mean, median and maximum of each value
+    cols_to_keep = ["total_cost", "total_carbon", "total_bill_savings", "total_sap_points", "adjusted_heat_demand",
+                    "energy_percent_change", "carbon_percent_change", "bills_percent_change"]
+    arch_1_recommendation_min = recommendations_arch_1_summary.min()[cols_to_keep]
+    arch_1_recommendation_max = recommendations_arch_1_summary.max()[cols_to_keep]
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()[cols_to_keep]
+    arch_1_totals = recommendations_arch_1_summary.sum()[cols_to_keep]
+
+    annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_1_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['energy_percent_change']} - "
+                           f"{arch_1_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_1_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_1_recommendation_min['total_bill_savings']} - "
+                 f"{arch_1_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_1_recommendation_min['bills_percent_change']} - "
+                         f"{arch_1_recommendation_max['bills_percent_change']}")
+
+    ########################
+    # Archetype 2
+    ########################
+    archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
+    recommendations_arch_2_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+
+    arch_2_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+    arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+    # Take the mean, median and maximum of each value
+    arch_2_recommendation_min = recommendations_arch_2_summary.min()
+    arch_2_recommendation_max = recommendations_arch_2_summary.max()
+    arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
+
+    total_cost = recommendations_arch_2_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_2_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['energy_percent_change']} - "
+                           f"{arch_2_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_2_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_2_recommendation_min['total_bill_savings']} - "
+                 f"{arch_2_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_2_recommendation_min['bills_percent_change']} - "
+                         f"{arch_2_recommendation_max['bills_percent_change']}")
+
+    ########################
+    # Archetype 3
+    ########################
+    archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
+    recommendations_arch_3_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+
+    arch_3_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+    arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+    # Take the mean, median and maximum of each value
+    arch_3_recommendation_min = recommendations_arch_3_summary.min()
+    arch_3_recommendation_max = recommendations_arch_3_summary.max()
+    arch_3_recommendation_means = recommendations_arch_3_summary.mean()
+
+    total_cost = recommendations_arch_3_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_3_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['energy_percent_change']} - "
+                           f"{arch_3_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_3_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_3_recommendation_min['total_bill_savings']} - "
+                 f"{arch_3_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_3_recommendation_min['bills_percent_change']} - "
+                         f"{arch_3_recommendation_max['bills_percent_change']}")
+
+    ########################
+    # Archetype 4
+    ########################
+    archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
+    recommendations_arch_4_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+
+    arch_4_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+    arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+    # Take the mean, median and maximum of each value
+    arch_4_recommendation_min = recommendations_arch_4_summary.min()
+    arch_4_recommendation_max = recommendations_arch_4_summary.max()
+    arch_4_recommendation_means = recommendations_arch_4_summary.mean()
+
+    total_cost = recommendations_arch_4_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_4_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['energy_percent_change']} - "
+                           f"{arch_4_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_4_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_4_recommendation_min['total_bill_savings']} - "
+                 f"{arch_4_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_4_recommendation_min['bills_percent_change']} - "
+                         f"{arch_4_recommendation_max['bills_percent_change']}")
+
+    ########################
+    # Overview
+    ########################
+    overview_totals = recommendations_summary.sum()
+
+
+def make_sample():
+    # sample_proportion = 67 / 102
+    # Get the asset list
+    asset_list = read_csv_from_s3(
+        "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+    )
+    asset_list = pd.DataFrame(asset_list)
+
+    # From the asset list, we deduce how many properties we need
+    # Need to figure out the sizes
+    archetype_1_sample_size = 13
+    archetype_2_sample_size = 30
+    archetype_3_sample_size = 15
+    archetype_4_sample_size = 9
+
+    # We take the sample and we'll keep the uprns static
+    archetype_1_sample = asset_list[
+        asset_list["archetype"] == "Archetype 1"
+        ].sample(archetype_1_sample_size)["uprn"].to_list()
+
+    archetype_2_sample = asset_list[
+        asset_list["archetype"] == "Archetype 2"
+        ].sample(archetype_2_sample_size)["uprn"].to_list()
+
+    archetype_3_sample = asset_list[
+        asset_list["archetype"] == "Archetype 3"
+        ].sample(archetype_3_sample_size)["uprn"].to_list()
+
+    archetype_4_sample = asset_list[
+        asset_list["archetype"] == "Archetype 4"
+        ].sample(archetype_4_sample_size)["uprn"].to_list()
+
+
+def scenario_2():
+    # Connect to database
+    session = sessionmaker(bind=db_engine)()
+
+    ########################################################################
+    # Get the data we need
+    ########################################################################
+
+    portfolio_id = PORTFOLIO_ID_2
+
+    # Get the asset list
+    asset_list = read_csv_from_s3(
+        "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+    )
+    asset_list = pd.DataFrame(asset_list)
+
+    sample_uprns = archetype_1_sample + archetype_2_sample + archetype_3_sample + archetype_4_sample
+
+    # Filter on sample uprns
+    asset_list = asset_list[asset_list["uprn"].astype(str).isin(sample_uprns)]
+
+    # Get the properties for the portfolio
+    properties = get_properties_with_default_recommendations(session, portfolio_id)
+    properties_df = pd.DataFrame(properties)
+    properties_df = properties_df[properties_df["uprn"].astype(str).isin(sample_uprns)]
+
+    # We now pull the data for the property details
+    property_details = get_property_details_by_portfolio_id(session, portfolio_id)
+    property_details_df = pd.DataFrame(property_details)
+    property_details_df = property_details_df[property_details_df["property_id"].isin(properties_df["id"].values)]
+    # We estimate bills based on the adjusted_energy_consumption
+    property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
+        lambda x: AnnualBillSavings.calculate_annual_bill(x)
+    )
+    # Merge on uprn
+    property_details_df = property_details_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
+
+    plans = get_plan_by_portfolio_id(session, portfolio_id)
+    plans_df = pd.DataFrame(plans)
+
+    # Unnest the recommendations. Each recommendation is a list of dictionaries
+    recommendations_exploded = properties_df["recommendations"].explode().tolist()
+    recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+    # Add uprn on
+    recommendations_df = recommendations_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        how="left",
+        on="property_id"
+    )
+
+    recommendations_summary = create_recommendations_summary(
+        recommendations_df,
+        properties_df,
+        property_details_df,
+        SAP_TARGET_1
+    )
+
+    # Calculate % changes of energ, co2 and abs
+    recommendations_summary["carbon_percent_change"] = (
+        recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
+    )
+
+    recommendations_summary["energy_percent_change"] = (
+        recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
+    )
+
+    recommendations_summary["bills_percent_change"] = (
+        recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
+    )
+
+    ########################
+    # Overview
+    ########################
+    overview_totals = recommendations_summary.sum()
+    overview_means = recommendations_summary.mean()
+
+    ########################
+    # Measures
+    ########################
+    measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+    wall_insulation_measures = measures_count[
+        measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
+    ]["id"].sum()
+    ventilation_measures = measures_count[
+        measures_count["type"].isin(["mechanical_ventilation"])
+    ]["id"].sum()
+    roof_insulation_measures = measures_count[
+        measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
+    ]["id"].sum()
+    floor_insulation_measures = measures_count[
+        measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
+    ]["id"].sum()
+    windows = measures_count[
+        measures_count["type"].isin(["windows_glazing"])
+    ]["id"].sum()
+    heating = measures_count[
+        measures_count["type"].isin(["heating"])
+    ]["id"].sum()
+    heating_controls = measures_count[
+        measures_count["type"].isin(["heating_control"])
+    ]["id"].sum()
+    solar = measures_count[
+        measures_count["type"].isin(["solar_pv"])
+    ]["id"].sum()
+    other = measures_count[
+        ~measures_count["type"].isin([
+            "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
+            "loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
+            "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
+            "mechanical_ventilation"
+        ])
+    ]["id"].sum()
+
+    z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]
+
+    recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]["type"].value_counts()
+
+    # Summary information by each archetype
+    ########################
+    # Archetype 1
+    ########################
+    archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+    recommendations_arch_1_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+
+    arch_1_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+    ]
+    arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+    # Take the mean, median and maximum of each value
+    arch_1_recommendation_min = recommendations_arch_1_summary.min()
+    arch_1_recommendation_max = recommendations_arch_1_summary.max()
+    arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+
+    arch_1_totals = recommendations_arch_1_summary.sum()
+
+    annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_1_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['energy_percent_change']} - "
+                           f"{arch_1_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_1_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_1_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_1_recommendation_min['total_bill_savings']} - "
+                 f"{arch_1_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_1_recommendation_min['bills_percent_change']} - "
+                         f"{arch_1_recommendation_max['bills_percent_change']}")
+
+    ########################
+    # Archetype 2
+    ########################
+    archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
+    recommendations_arch_2_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+
+    arch_2_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
+    ]
+    arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+    # Take the mean, median and maximum of each value
+    arch_2_recommendation_min = recommendations_arch_2_summary.min()
+    arch_2_recommendation_max = recommendations_arch_2_summary.max()
+    arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
+
+    total_cost = recommendations_arch_2_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_2_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['energy_percent_change']} - "
+                           f"{arch_2_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_2_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_2_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_2_recommendation_min['total_bill_savings']} - "
+                 f"{arch_2_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_2_recommendation_min['bills_percent_change']} - "
+                         f"{arch_2_recommendation_max['bills_percent_change']}")
+
+    ########################
+    # Archetype 3
+    ########################
+    archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
+    recommendations_arch_3_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+
+    arch_3_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
+    ]
+    arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+    # Take the mean, median and maximum of each value
+    arch_3_recommendation_min = recommendations_arch_3_summary.min()
+    arch_3_recommendation_max = recommendations_arch_3_summary.max()
+    arch_3_recommendation_means = recommendations_arch_3_summary.mean()
+
+    total_cost = recommendations_arch_3_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_3_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['energy_percent_change']} - "
+                           f"{arch_3_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_3_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_3_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_3_recommendation_min['total_bill_savings']} - "
+                 f"{arch_3_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_3_recommendation_min['bills_percent_change']} - "
+                         f"{arch_3_recommendation_max['bills_percent_change']}")
+
+    ########################
+    # Archetype 4
+    ########################
+    archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
+    recommendations_arch_4_summary = recommendations_summary[
+        recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+
+    arch_4_property_details = property_details_df[
+        property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+    ]
+    arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+    # Take the mean, median and maximum of each value
+    arch_4_recommendation_min = recommendations_arch_4_summary.min()
+    arch_4_recommendation_max = recommendations_arch_4_summary.max()
+    arch_4_recommendation_means = recommendations_arch_4_summary.mean()
+
+    total_cost = recommendations_arch_4_summary["total_cost"].sum()
+    annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
+    annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
+    annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
+    archetype_measures = \
+        recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
+            "id"].count().reset_index()
+
+    cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
+                 f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
+
+    sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
+                f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
+
+    energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
+                   f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
+                   f"{arch_4_recommendation_max['adjusted_heat_demand']}")
+
+    energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['energy_percent_change']} - "
+                           f"{arch_4_recommendation_max['energy_percent_change']}")
+
+    carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
+                   f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
+
+    carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
+                           f"{arch_4_recommendation_min['carbon_percent_change']} - "
+                           f"{arch_4_recommendation_max['carbon_percent_change']}")
+
+    bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
+                 f"{arch_4_recommendation_min['total_bill_savings']} - "
+                 f"{arch_4_recommendation_max['total_bill_savings']}")
+
+    bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
+                         f"{arch_4_recommendation_min['bills_percent_change']} - "
+                         f"{arch_4_recommendation_max['bills_percent_change']}")
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@ -0,0 +1,129 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 70
+
+council_tax_bands = [
+    {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'band': 'A'},
+    {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'band': 'A'},
+    {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'band': 'A'},
+    {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'band': 'A'},
+    {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'band': 'A'},
+    {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'band': 'B'},
+    {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'band': 'B'},
+    {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'band': 'C'},
+    {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'},
+    {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'},
+]
+council_tax_bands = pd.DataFrame(council_tax_bands)
+
+# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
+# that has not reached the API
+patches = [
+    {
+        'address': '6 Beech Road', 'postcode': 'DY1 4BP',
+        'walls-description': 'Cavity wall, filled cavity',
+        'walls-energy-eff': 'Good',
+        'roof-description': 'Pitched, 12 mm loft insulation',
+        'roof-energy-eff': 'Very Poor',
+        'windows-description': 'Fully double glazed',
+        'windows-energy-eff': 'Good',
+        'mainheat-description': 'Room heaters, electric',
+        'mainheat-energy-eff': 'Very Poor',
+        'mainheatcont-description': 'Appliance thermostats',
+        'mainheatc-energy-eff': 'Good',
+        'lighting-description': 'Low energy lighting in 25% of fixed outlets',
+        'lighting-energy-eff': 'Good',
+        'floor-description': 'Solid, no insulation (assumed)',
+        'secondheat-description': 'None',
+        'current-energy-efficiency': '32',
+        'energy-consumption-current': '491',
+        'co2-emissions-current': '5.0',
+        'potential-energy-efficiency': '87'
+    }
+]
+
+# This is information that is found as a result of the non-invasives, that mean that certain measures
+# have been installed already. To reflect this in the front end, it is included in the recommendation, however
+# the cost is removed and instead, a message is presented saying that the measure is already installed.
+already_installed = [
+    {
+        'address': '5 Oaklands',
+        'postcode': 'B62 0JA',
+        "already_installed": ["windows_glazing"]
+    }
+]
+
+
+def app():
+    raw_asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Immo/IMMO Sample Assets_Dudley.xlsx",
+        header_row=0
+    )
+    raw_asset_list = raw_asset_list.drop(columns=["Unnamed: 0"])
+    # Extract address and postcode
+    raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
+    raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
+
+    asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
+
+    # We're provided with number of bathrooms and number of bedrooms.
+    asset_list = asset_list.rename(
+        columns={
+            "No. of Beds": "n_bedrooms",
+            "No. of WC's": "n_bathrooms"
+        }
+    )
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store overrides in s3
+    already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(already_installed),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=already_installed_filename
+    )
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
+    # EPC C portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": already_installed_filename,
+        "patches_file_path": patches_filename,
+        "budget": None,
+    }
+    print(body)
+
+    # EPC B portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID + 1),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": already_installed_filename,
+        "patches_file_path": patches_filename,
+        "budget": None,
+    }
+    print(body)
--- a/etl/customers/immo/pilot/non_invasive.py
+++ b/etl/customers/immo/pilot/non_invasive.py
@ -0,0 +1,210 @@
+# import extract_msg
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes
+
+
+def parse_msg_body(text):
+    # Split the text into lines
+    lines = text.split('\r\n')
+
+    # Dictionary to hold the parsed data
+    data = {}
+
+    # Process each line
+    for line in lines:
+        # Remove all asterisks and extra whitespace
+        clean_line = line.replace('*', '').strip()
+
+        if clean_line:  # Ensure the line is not empty after cleaning
+            # Attempt to split clean '=' if present
+            if '=' in clean_line:
+                clean_line = clean_line.replace(' = ', ': ')
+
+            # Use line content as a key with a default value indicating presence
+            # Generate a unique key for lines without '='
+            data[f"Info{len(data) + 1}"] = clean_line
+
+    return data
+
+
+def app():
+    """
+    This code retrieves the results of the non-invasive surveys, to be stored in S3
+    :return:
+    """
+
+    # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/5 Oaklands B62 "
+    #             "0JA/Immo - 5 Oaklands Halesowen B62 0JA.msg")
+    # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/6 Beech Rd DY1 "
+    #             "4BP/IMMO - 6 Beech Road Dudley DY1 4BP.msg")
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/8 Corporation Rd DY2 "
+    #     "7PX/IMMO - 8 Corporation Road Dudley DY2 7PX.msg"
+    # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/"
+    #     "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg"
+    # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
+    #     "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
+    # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 "
+    #     "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg"
+    # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
+    #     "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
+    # )
+    # filepath = (
+    #     "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
+    #     "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
+    # )
+    #
+    # with extract_msg.Message(filepath) as msg:
+    #     body = msg.body
+    #
+    # from pprint import pprint
+    # pprint(parse_msg_body(body))
+
+    # We manually create the non-invasive notes for the pilot
+    non_invasive_notes = [
+        {
+            'uprn': 90028499,
+            # 'address': '5 Oaklands',
+            # 'postcode': 'B62 0JA',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation. '
+                               'There is a shared alleyway with the neighbour, that is a solid brick wall.',
+            'Wall Render': 'Partial render between top of ground floor window and bottom of 1st floor window',
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: North East, Back house direction: South West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'uprn': 90055152,
+            # 'address': '6 Beech Road',
+            # 'postcode': 'DY1 4BP',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, '
+                               'retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': None,
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Side house direction: North East',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'uprn': 90070461,
+            # 'address': '8 Corporation Road',
+            # 'postcode': 'DY2 7PX',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Wall Insulation': "External wall insulation",
+            'Wall Render': "Render finish throughout",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: North East, Back house direction: South West',
+            'Access to mains?': None,
+        },
+        {
+            'uprn': 90022227,
+            # 'address': '21 Wells Road',
+            # 'postcode': 'DY5 3TB',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': None,
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: East, Back house direction: West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'uprn': 90077535,
+            # 'address': '47 Fairfield Road',
+            # 'postcode': 'DY8 5UJ',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': None,
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: East, Back house direction: West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'uprn': 90060989,
+            # 'address': '53 Bromley',
+            # 'postcode': 'DY5 4PJ',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - "
+                               "recommends a cavity wall fill",
+            "Roof": "Hipped roof",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': "Front house direction: North, Back house direction: South, Side house direction: West",
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'uprn': 90048026,
+            # 'address': '91 Osprey Drive',
+            # 'postcode': 'DY1 2JS',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': 'Tile hung front and rear of property',
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Side house direction: East',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'uprn': 90093693,
+            # 'address': '150 Huntingtree Road',
+            # 'postcode': 'B63 4HP',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Heating': 'Electric (storage heaters)',
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            "Roof": "Hipped roof",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': "Front house direction: North West, Back house direction: South East, Side house direction: "
+                           "North East",
+        },
+        {
+            'uprn': 90051858,
+            # 'address': '195 Ashenhurst Road',
+            # 'postcode': 'DY1 2JB',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': "Solid render front and rear of property",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: South, Back house direction: North',
+            'Access to mains?': 'Property has access to the mains',
+        },
+        {
+            'uprn': 90106884,
+            # 'address': '27 Milton Road',
+            # 'postcode': 'WV14 8HZ',
+            'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+            'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+            'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+                               'CIGA check and extracting the cavity, replacing with bead insulation.',
+            'Wall Render': "Solid render front and rear of property",
+            'Existing solar PV': 'No existing solar',
+            'Orientation': 'Front house direction: South East, Back house direction: North West',
+            'Access to mains?': 'Property has access to the mains',
+        },
+    ]
+
+    session = sessionmaker(bind=db_engine)()
+    upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)
--- a/etl/customers/immo/pilot/requirements.txt
+++ b/etl/customers/immo/pilot/requirements.txt
@ -0,0 +1 @@
+extract-msg
--- a/etl/customers/slide_utils.py
+++ b/etl/customers/slide_utils.py
@ -0,0 +1,293 @@
+from pptx.enum.text import PP_ALIGN  # NOQA
+from pptx import Presentation
+from pptx.util import Inches, Pt
+import matplotlib.pyplot as plt
+from sqlalchemy.orm import Session
+from sqlalchemy.sql import true
+from backend.app.db.utils import row2dict
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+from backend.app.db.models.recommendations import Recommendation
+from backend.app.db.models.recommendations import Plan
+from backend.app.utils import sap_to_epc
+
+EPC_COLOURS = {
+    "A": "#028051",
+    "B": "#14b759",
+    "C": "#8ecd46",
+    "D": "#fdd401",
+    "E": "#fdab67",
+    "F": "#ee8023",
+    "G": "#e71437"
+}
+
+
+def get_properties_with_default_recommendations(session: Session, portfolio_id: int):
+    """
+    Fetch properties for a given portfolio_id along with their default recommendations,
+    ensuring that all properties are retrieved even if they don't have recommendations
+    where default is True.
+
+    :param session: The SQLAlchemy session used to execute the query.
+    :param portfolio_id: The ID of the portfolio for which to retrieve properties and recommendations.
+    :return: A list of dictionaries, where each dictionary represents a property including
+             its associated default recommendations if any.
+    """
+    # Adjust the join to correctly filter recommendations while including all properties
+    query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
+                                                                   (Recommendation.property_id == PropertyModel.id) & (
+                                                                       Recommendation.default == true())) \
+        .filter(PropertyModel.portfolio_id == portfolio_id) \
+        .all()
+
+    properties = {}
+    for property, recommendation in query:
+        # Ensure the property is added once with an empty list of recommendations initially
+        if property.id not in properties:
+            properties[property.id] = row2dict(property)
+            properties[property.id]['recommendations'] = []
+
+        # Append recommendations if they exist and meet the criteria (already filtered by the query)
+        if recommendation and recommendation.default:
+            properties[property.id]['recommendations'].append(row2dict(recommendation))
+
+    return list(properties.values())
+
+
+def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
+    """
+    This function retrieves all property details associated with a given portfolio_id.
+
+    :param session: The SQLAlchemy session used to execute the query.
+    :param portfolio_id: The ID of the portfolio for which to retrieve property details.
+    :return: A list of dictionaries, where each dictionary represents a property's details.
+             Returns an empty list if no property details are found.
+    """
+    property_details = session.query(PropertyDetailsEpcModel).filter(
+        PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
+
+    # Convert the SQLAlchemy objects to dictionaries
+    property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
+
+    return property_details_dict
+
+
+def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
+    """
+    This function retrieves all plans associated with a given portfolio_id.
+
+    :param session: The SQLAlchemy session used to execute the query.
+    :param portfolio_id: The ID of the portfolio for which to retrieve plans.
+    :return: A list of dictionaries, where each dictionary represents a plan.
+             Returns an empty list if no plans are found.
+    """
+    plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all()
+
+    # Convert the SQLAlchemy objects to dictionaries
+    plans_dict = [row2dict(plan) for plan in plans] if plans else []
+
+    return plans_dict
+
+
+def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
+    """
+    Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
+    Allows setting the plot background color and dynamically adjusts text size and bar spacing.
+
+    :param df: DataFrame with columns ['current_epc_rating', 'count', 'percentage']
+    :param title: Title of the plot
+    :param background_color: Background color of the plot
+    :param bar_height: Thickness of the bars (default 0.4)
+    :param font_size: Base font size for text annotations (default 15)
+    """
+    # Calculate dynamic figure size or adjust based on preferences
+    square_size = max(6, len(df) * 0.6)  # Ensure minimum size and adjust based on number of entries
+    fig, ax = plt.subplots(figsize=(square_size, square_size))
+    fig.patch.set_facecolor(background_color)  # Set figure background color
+    ax.set_facecolor(background_color)  # Set axes background color
+
+    df['percentage'] = df['percentage'].round(1)  # Round the percentage values to 1 decimal place
+    df_sorted = df.sort_values('percentage', ascending=True)
+
+    # Plot bars with specified height for adjustable thickness
+    bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
+                   color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
+
+    epc_rating_font_size = font_size * 2  # EPC rating font size larger than base font size
+    count_percentage_font_size = font_size  # Count (percentage) font size as base font size
+
+    # Annotate bars with EPC ratings inside and count with percentage values outside
+    for index, bar in enumerate(bars):
+        width = bar.get_width()
+        epc_rating = df_sorted.iloc[index]['current_epc_rating']
+        count = df_sorted.iloc[index]['count']
+        percentage = df_sorted.iloc[index]['percentage']
+
+        # EPC rating inside the bar with increased font size
+        ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
+                f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
+
+        # Count and percentage outside the bar, original font size
+        ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
+                f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
+
+    ax.set_title(title, fontsize=font_size * 1.2)  # Adjust title font size proportionally
+    ax.tick_params(axis='x', which='both', bottom=False, top=False,
+                   labelbottom=False)  # Remove x-axis tick marks and values
+    ax.tick_params(axis='y', which='both', left=False, right=False,
+                   labelleft=False)  # Remove y-axis tick marks and labels
+    ax.spines['top'].set_visible(False)  # Remove top spine
+    ax.spines['right'].set_visible(False)  # Remove right spine
+    ax.spines['left'].set_visible(False)  # Remove left spine
+    ax.spines['bottom'].set_visible(False)  # Remove bottom spine
+
+    plt.tight_layout()  # Adjust layout
+    plt.show()
+
+    # Save the figure as an image
+    figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
+    fig.savefig(figure_path, bbox_inches='tight')
+    plt.close(fig)  # Close the figure to free memory
+
+    return fig, figure_path
+
+
+def save_plot_to_image(figure, path='plot.png'):
+    """
+    Saves a matplotlib figure to an image file for insertion into PowerPoint.
+    """
+    figure.savefig(path, bbox_inches='tight')
+    plt.close(figure)
+
+
+def save_figure_as_image(figure, filename='temp_plot.png'):
+    """
+    Saves a matplotlib figure to an image file.
+    """
+    figure.savefig(filename, dpi=300)
+    plt.close(figure)  # Close the figure to prevent it from displaying in notebooks or Python environments
+
+
+def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8),
+                                height_inches=Inches(2)):
+    """
+    Adds commentary with bullet points to a slide.
+
+    :param slide: The slide object to add the commentary to.
+    :param commentary: The commentary text, with sections separated by newlines for bullet points.
+    :param top_inches: The top position of the commentary text box.
+    :param left_inches: The left position of the commentary text box.
+    :param width_inches: The width of the commentary text box.
+    :param height_inches: The height of the commentary text box.
+    """
+    txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches)
+    tf = txBox.text_frame
+
+    # Configure text frame
+    tf.word_wrap = True
+    tf.auto_size = True
+    tf.paragraphs[0].alignment = PP_ALIGN.LEFT
+
+    # Split the commentary into sections for bullet points
+    sections = commentary.split("\n")
+
+    for i, section in enumerate(sections):
+        if i > 0:
+            p = tf.add_paragraph()  # Add a new paragraph for each section after the first
+        else:
+            p = tf.paragraphs[0]  # Use the first paragraph for the first section
+        p.text = section
+        p.space_after = Pt(14)  # Adjust space after each bullet point as needed
+        p.font.size = Pt(14)  # Adjust font size as needed
+        p.level = 0  # Bullet level, can be adjusted for nested bullets
+        p.space_before = Pt(0)
+
+
+def add_slide_with_image(prs, title, img_path=None, commentary=None):
+    """
+    Adds a slide with an image (if provided) and optional commentary. If no image is provided,
+    places the commentary text in the middle of the slide.
+    """
+    slide_layout = prs.slide_layouts[5]  # Title and Content layout
+    slide = prs.slides.add_slide(slide_layout)
+    title_placeholder = slide.shapes.title
+    title_placeholder.text = title
+
+    # Determine the position of the commentary text box based on whether an image is included
+    if img_path:
+        # Add the image
+        slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
+        # Position for commentary when image is present
+        commentary_top = Inches(6)
+    else:
+        # Position for commentary when image is not present (centered vertically)
+        commentary_top = Inches(3)
+
+    # Add commentary if provided
+    if commentary:
+        add_commentary_with_bullets(slide, commentary, commentary_top)
+
+
+def create_powerpoint(data, save_location):
+    """
+    Creates a PowerPoint presentation based on provided data and optional commentaries.
+
+    :param data: A dictionary containing the data needed for each slide.
+    :param save_location: The file path where the PowerPoint presentation will be saved.
+    """
+    prs = Presentation()
+
+    for slide, slide_data in data.items():
+        slide_figure_path = data[slide].get('image_path')
+        text = data[slide].get('text')
+        title = data[slide].get('title', "")
+        add_slide_with_image(prs, title, slide_figure_path, text)
+
+    # Save the presentation
+    prs.save(save_location)
+
+
+def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
+    # Aggregate the impact of the recommendations
+    # We want:
+    # Total number of sap points
+    # total valuation impact
+    # total bill savings
+    # total cost
+    # Total Co2 impact
+    recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
+        total_sap_points=("sap_points", "sum"),
+        total_valuation_impact=("property_valuation_increase", "sum"),
+        total_bill_savings=("energy_cost_savings", "sum"),
+        total_cost=("estimated_cost", "sum"),
+        total_carbon=("co2_equivalent_savings", "sum"),
+        adjusted_heat_demand=("adjusted_heat_demand", "sum")
+    ).reset_index()
+    # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
+    recommendations_summary = recommendations_summary.merge(
+        properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
+        how="left"
+    )
+
+    recommendations_summary["expected_sap_points"] = (
+        recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
+    )
+    recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
+        lambda x: sap_to_epc(x)
+    )
+    recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
+
+    if property_details_df is not None:
+        recommendations_summary = recommendations_summary.merge(
+            property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
+                columns={
+                    "id": "property_id",
+                    "co2_emissions": "current_co2",
+                    "adjusted_energy_consumption": "current_energy",
+                    "energy_bill": "current_energy_bill"
+                }
+            ),
+            on="uprn",
+            how="left"
+        )
+
+    return recommendations_summary
--- a/etl/customers/urban_splash/asset_list.py
+++ b/etl/customers/urban_splash/asset_list.py
@ -0,0 +1,195 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+# Read in the .env file in backend
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+USER_ID = 8
+PORTFOLIO_ID = 66
+SECOND_SCENARIO_PORTFOLIO_ID = 65
+
+# We also create a second portfolio for a subset of properties that do not meet the install requirements
+# We drop these uprns from the first plan
+second_portfolio_uprns = [
+    10070056840, 10070056846, 10070056847, 10070056843, 10070056848, 10070056844, 10070056849,
+    10070056829, 10070056920, 10023345463
+]
+
+
+def app():
+    """
+    This application will read in the Urban Splash data, in the dev AWS account, and pre-process it. There are a
+    few issues with the file, including incorrect postcodes.
+
+    The customer is interested in the following:
+    - Getting properties to an EPC C
+    - Doing do within a budget of £5,000
+    :return:
+    """
+
+    potential_postcodes = ["BD9 5BQ", "BD9 5BR", "BD9 5BN"]
+
+    raw_asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/urban_splash/raw_asset_list/USRF - Velvet Mill EPC.xlsx",
+        header_row=2
+    )
+
+    # We have a series of apartment numbers that are "Apartment 001", "Apartment 002", etc. We need to convert these
+    # to "Apartment 1", "Apartment 2", etc.
+    raw_asset_list["address1"] = raw_asset_list["Unit Number"].str.replace(
+        "Apartment 00", "Apartment ", regex=True
+    )
+    raw_asset_list["address1"] = raw_asset_list["address1"].str.replace(
+        "Apartment 0", "Apartment ", regex=True
+    )
+
+    # For each entry in the asset list, we make an api call to the EPC database to get the EPC data. We'll retrieve the
+    # uprn for the property, as well as a nice address and postcode that we can use. We'll also try and deduce the
+    # likely wall construction, since many of the homes are new builds, based on their newest EPC
+
+    epc_data = []
+    processed_asset_list = []
+    for _, row in tqdm(raw_asset_list.iterrows(), total=len(raw_asset_list)):
+
+        newest_epc = None
+        idx = 0
+
+        while newest_epc is None:
+            postcode = potential_postcodes[idx]
+            searcher = SearchEpc(
+                address1=row.address1, postcode=postcode, auth_token=EPC_AUTH_TOKEN, os_api_key=""
+            )
+            searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                if idx == len(potential_postcodes) - 1:
+                    break
+                idx += 1
+            else:
+                newest_epc = searcher.newest_epc
+
+        if newest_epc is None:
+            raise Exception("FX ME")
+
+        if row["Beds"] == "Studio":
+            number_heated_rooms = 2
+            number_habitable_rooms = 2
+        else:
+            # Assume one room for communal space, one room for bathroom
+            number_heated_rooms = row["Beds"] + 2
+            number_habitable_rooms = row["Beds"] + 2
+
+        to_append = {
+            **row.to_dict(),
+            "uprn": newest_epc["uprn"],
+            "address": newest_epc["address1"],
+            "postcode": newest_epc["postcode"],
+            # "walls-description": newest_epc["walls-description"],
+            # "roof-description": newest_epc["roof-description"],
+            # "floor-description": newest_epc["floor-description"],
+            # "total-floor-area": newest_epc["total-floor-area"],
+            "full-address": newest_epc["address"],
+            "number-heated-rooms": number_heated_rooms,
+            "number-habitable-rooms": number_habitable_rooms,
+        }
+
+        processed_asset_list.append(to_append)
+        epc_data.append(newest_epc)
+
+    processed_asset_list_df = pd.DataFrame(processed_asset_list)
+
+    epc_data_df = pd.DataFrame(epc_data)
+
+    # We store this data
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
+    save_csv_to_s3(
+        dataframe=processed_asset_list_df[
+            ~processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
+        ],
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "budget": None,
+    }
+    print(body)
+
+    subset = processed_asset_list_df[
+        processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
+    ]
+
+    filename2 = f"{USER_ID}/{SECOND_SCENARIO_PORTFOLIO_ID}/test_inputs.csv"
+    save_csv_to_s3(
+        dataframe=subset,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename2
+    )
+
+    body = {
+        "portfolio_id": str(SECOND_SCENARIO_PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "budget": None,
+    }
+    print(body)
+
+    # Some basic analysis on the heating, heating controls and hot water systems
+
+    # All of the heating systems are rated very poor, poor or average. When it's average, they are all also
+    # "Room heaters, electric", but the house has "Programmer and appliance thermostats" for the heating controls.
+    # which is more efficient
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+
+    # Heating
+    print(epc_data_df[["mainheat-description", "mainheatcont-description", "mainheat-energy-eff"]].drop_duplicates())
+    #                    mainheat-description              mainheatcont-description mainheat-energy-eff
+    # 0                Room heaters, electric        Programmer and room thermostat           Very Poor
+    # 12               Room heaters, electric  Programmer and appliance thermostats             Average
+    # 20  Electric storage heaters, radiators                  Celect-type controls                Poor
+
+    # Hot water
+    print(epc_data_df[["hotwater-description", "hot-water-energy-eff"]].drop_duplicates())
+    #                    hotwater-description hot-water-energy-eff
+    # 0   Electric immersion, standard tariff            Very Poor
+    # 12         Electric immersion, off-peak              Average
+
+    # We now retrieve EPCS for all of the properties that are in these postcodes very obviously for the velvet mill
+    # We'll use this information to get a sense of the likely wall/roof/floor construction for the properties
+
+    # client = EpcClient(auth_token=EPC_AUTH_TOKEN)
+    #
+    # neighbouring_epcs = []
+    # for pc in potential_postcodes:
+    #     response = client.domestic.search(params={"postcode": pc}, size=1000)
+    #     data = response["rows"]
+    #
+    #     # keep just rows that are clearly for the velvet mill
+    #     data = [x for x in data if "velvet" in x["address1"].lower()]
+    #
+    #     neighbouring_epcs.extend(data)
+    #
+    # neighbouring_epcs_df = pd.DataFrame(neighbouring_epcs)
+    # neighbouring_epcs_df["walls-description"].value_counts()
+    # neighbouring_epcs_df["roof-description"].value_counts()
+    # neighbouring_epcs_df["floor-description"].value_counts()
--- a/etl/customers/urban_splash/slides.py
+++ b/etl/customers/urban_splash/slides.py
@ -0,0 +1,352 @@
+"""
+This script contains the code to generate the data required to populate the slides
+We connect to the database amd extract the data for the portfolio needed so it is recommended to use
+a environment akin to the backend to run this script
+"""
+import pandas as pd
+import numpy as np
+from backend.app.db.connection import db_engine
+from sqlalchemy.orm import sessionmaker
+from etl.customers.slide_utils import (
+    plot_epc_distribution,
+    get_property_details_by_portfolio_id,
+    get_plan_by_portfolio_id,
+    get_properties_with_default_recommendations,
+    create_powerpoint,
+    create_recommendations_summary
+)
+
+PORTFOLIO_ID = 66
+SECOND_SCENARIO_PORTFOLIO_ID = 65
+EPC_TARGET = "C"
+SAP_TARGET = 69
+CUSTOMER_KEY = "urban_splash"
+
+
+def app():
+    # Connect to database
+    session = sessionmaker(bind=db_engine)()
+
+    ########################################################################
+    # Get the data we need
+    ########################################################################
+
+    # Get the properties for the portfolio
+    properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID)
+    properties_df = pd.DataFrame(properties)
+
+    # We now pull the data for the property details
+    property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID)
+    property_details_df = pd.DataFrame(property_details)
+    # Merge on uprn
+    property_details_df = property_details_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
+
+    plans = get_plan_by_portfolio_id(session, PORTFOLIO_ID)
+    plans_df = pd.DataFrame(plans)
+
+    # Unnest the recommendations. Each recommendation is a list of dictionaries
+    recommendations_exploded = properties_df["recommendations"].explode().tolist()
+    recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+
+    recommendations_summary = create_recommendations_summary(recommendations_df, properties_df, SAP_TARGET)
+
+    # Get the data for the second scenario portfolio
+    properties_second_scenario = get_properties_with_default_recommendations(session, SECOND_SCENARIO_PORTFOLIO_ID)
+    properties_second_scenario_df = pd.DataFrame(properties_second_scenario)
+
+    propert_details_second_scenario = get_property_details_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
+    property_details_second_scenario_df = pd.DataFrame(propert_details_second_scenario)
+    # Merge on uprn
+    property_details_second_scenario_df = property_details_second_scenario_df.merge(
+        properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
+
+    plans_second_scenario = get_plan_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
+    plans_second_scenario_df = pd.DataFrame(plans_second_scenario)
+    # Merge on uprn so we can compare properties across portfolios
+    plans_second_scenario_df = plans_second_scenario_df.merge(
+        properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}), on="property_id"
+    )
+
+    recommendations_exploded_second_scenario = properties_second_scenario_df["recommendations"].explode().tolist()
+    recommendations_second_scenario_df = pd.DataFrame(
+        [r for r in recommendations_exploded_second_scenario if not pd.isnull(r)]
+    )
+
+    recommendations_summary_second_scenario = create_recommendations_summary(
+        recommendations_second_scenario_df, properties_second_scenario_df, SAP_TARGET
+    )
+
+    # Combine the data for both scenarios
+    full_property_details = pd.concat([property_details_df, property_details_second_scenario_df])
+    full_properties = pd.concat([properties_df, properties_second_scenario_df])
+
+    epc_rating_summary = full_properties.groupby("current_epc_rating").size().reset_index(name="count")
+    epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
+
+    ########################################################################
+    # We pull out the data for the slides
+    ########################################################################
+
+    ############
+    # Slide 1:
+    ############
+    # visual
+    epc_plot, figure_path = plot_epc_distribution(
+        epc_rating_summary, CUSTOMER_KEY, title="", background_color="white", bar_height=0.75, font_size=15
+    )
+
+    # floor area - upper and lower bounds
+
+    # Take just properties that are below EPC C
+    properties_needing_work = full_properties[
+        full_properties["current_sap_points"] < SAP_TARGET
+        ]
+    property_details_needing_work = full_property_details[
+        full_property_details["uprn"].isin(properties_needing_work["uprn"])
+    ]
+
+    min_area, max_area, average_area = (
+        full_property_details["total_floor_area"].min(),
+        full_property_details["total_floor_area"].max(),
+        full_property_details["total_floor_area"].mean()
+    )
+
+    # Annual energy consumption - upper and lower bounds
+    min_energy_consumption, max_energy_consumption, average_consumption, total_consumption = (
+        property_details_needing_work["adjusted_energy_consumption"].min(),
+        property_details_needing_work["adjusted_energy_consumption"].max(),
+        property_details_needing_work["adjusted_energy_consumption"].mean(),
+        property_details_needing_work["adjusted_energy_consumption"].sum()
+    )
+
+    # Co2 emissions - upper and lower bounds
+    min_co2, max_co2, average_co2, total_co2 = (
+        property_details_needing_work["co2_emissions"].min(),
+        property_details_needing_work["co2_emissions"].max(),
+        property_details_needing_work["co2_emissions"].mean(),
+        property_details_needing_work["co2_emissions"].sum()
+    )
+
+    # Valuation: upper and lower bounds and average - take positive values in case we have just a sample
+    valuation_df = properties_df[properties_df["current_valuation"] > 0]
+    min_valuation, max_valuation, average_valuation = (
+        valuation_df["current_valuation"].min(),
+        valuation_df["current_valuation"].max(),
+        valuation_df["current_valuation"].median()
+    )
+
+    recommendations_df.keys()
+
+    slide_1_commentary = (
+        f"Floor areas range from {min_area} to {max_area} square meters, with an average of {average_area} square "
+        f"meters. \n"
+        f"Annual energy consumption ranges from {min_energy_consumption} to {max_energy_consumption} kWh, with an "
+        f"average of {average_consumption} kWh. \n"
+        f"CO2 emissions range from {min_co2} to {max_co2} tonnes, with an average of {average_co2} tonnes. \n"
+        f"Valuations range from £{min_valuation} to £{max_valuation} £, with an average of £"
+        f"{average_valuation}.\n"
+    )
+
+    ############
+    # Slide 2:
+    ############
+    # What it would take to hit EPC C
+
+    # We calculate the number of units that will make it to an EPC C
+
+    units_hitting_target = recommendations_summary[
+        recommendations_summary["expected_epc_rating"] == EPC_TARGET
+        ]
+
+    n_units_to_target = units_hitting_target.shape[0]
+
+    measures = "Electrical heating system upgrades & heating controls and Hot water system improvements"
+
+    # Costs
+    (
+        expected_cost_per_unit_lower,
+        expected_cost_per_unit_upper,
+        expected_project_cost,
+    ) = (
+        units_hitting_target["total_cost"].min(),
+        units_hitting_target["total_cost"].max(),
+        units_hitting_target["total_cost"].sum()
+    )
+
+    # Per property
+    # Take positive entries just in case we we have a sample
+    valuation_impact_df = plans_df[plans_df["property_id"].isin(units_hitting_target["property_id"])]
+    valuation_impact_df = valuation_impact_df[valuation_impact_df["valuation_increase_lower_bound"] > 0]
+    min_valuation_impact, max_valuation_impact, average_valuation_impact = (
+        valuation_impact_df["valuation_increase_lower_bound"].median(),
+        valuation_impact_df["valuation_increase_upper_bound"].median(),
+        valuation_impact_df["valuation_increase_average"].median()
+    )
+
+    # Bill savings per property
+    min_bill_savings, max_bill_savings, average_bill_savings = (
+        units_hitting_target["total_bill_savings"].min(),
+        units_hitting_target["total_bill_savings"].max(),
+        units_hitting_target["total_bill_savings"].mean()
+    )
+
+    # Total CO2 reduction of portfolio
+    min_co2_reduction, max_co2_reduction, average_co2_reduction, total_co2_reduction = (
+        units_hitting_target["total_carbon"].min(),
+        units_hitting_target["total_carbon"].max(),
+        units_hitting_target["total_carbon"].mean(),
+        units_hitting_target["total_carbon"].sum()
+    )
+
+    slide_2_commentary = (
+        f"{n_units_to_target} units expected to achieve EPC {EPC_TARGET} \n"
+        f"Expected cost: {expected_cost_per_unit_lower} - {expected_cost_per_unit_upper}, total project: £"
+        f"{expected_project_cost}\n"
+        f"Measures include: {measures}\n"
+        f"Valuation increase per property: £{min_valuation_impact}-{max_valuation_impact}, average: £"
+        f"{average_valuation_impact}\n"
+        f"Bill savings per property: £{min_bill_savings}-{max_bill_savings}, average: £{average_bill_savings}\n"
+        f"Total CO2 reduction: {min_co2_reduction}-{max_co2_reduction} tonnes, average: {average_co2_reduction}\n"
+        f"tonnes, total for the {n_units_to_target} properties: {total_co2_reduction} tonnes\n"
+    )
+
+    ############
+    # Slide 3:
+    ############
+
+    units_missed_target = recommendations_summary_second_scenario.copy()
+
+    n_units_missed_target = units_missed_target.shape[0]
+
+    # How close were the properties that missed the target
+    # We calculate the difference between the expected sap points and the lower bound sap points for the target
+
+    # min_difference, max_difference, average_difference = (
+    #     np.ceil(units_missed_target["sap_difference"].min()),
+    #     np.ceil(units_missed_target["sap_difference"].max()),
+    #     np.ceil(units_missed_target["sap_difference"].mean())
+    # )
+
+    second_scenario_measures = ("Electrical heating system upgrades & heating controls, Hot water system improvements "
+                                "and internal wall insulation")
+
+    # Just take all of the units in the second scenario, since they're borderline
+    units_hitting_target_second_scenario = recommendations_summary_second_scenario[
+        # (recommendations_summary_second_scenario["expected_epc_rating"] == EPC_TARGET) &
+        (recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
+    ]
+
+    n_units_hitting_second_scenario = units_hitting_target_second_scenario[
+        units_hitting_target_second_scenario["expected_epc_rating"] == EPC_TARGET
+        ].shape[0]
+
+    # Impact on second scenario
+    # Costs
+    (
+        expected_cost_per_unit_lower_second_scenario,
+        expected_cost_per_unit_upper_second_scenario,
+        expected_project_cost_second_scenario,
+    ) = (
+        recommendations_summary_second_scenario["total_cost"].min(),
+        recommendations_summary_second_scenario["total_cost"].max(),
+        recommendations_summary_second_scenario["total_cost"].sum()
+    )
+
+    valuation_impact_df_second_scenario = plans_second_scenario_df[
+        plans_second_scenario_df["uprn"].isin(units_hitting_target_second_scenario["uprn"])
+    ]
+    valuation_impact_df_second_scenario = valuation_impact_df_second_scenario[
+        valuation_impact_df_second_scenario["valuation_increase_lower_bound"] > 0
+        ]
+    (
+        min_valuation_impact_second_scenario,
+        max_valuation_impact_second_scenario,
+        average_valuation_impact_second_scenario
+    ) = (
+        valuation_impact_df_second_scenario["valuation_increase_lower_bound"].median(),
+        valuation_impact_df_second_scenario["valuation_increase_upper_bound"].median(),
+        valuation_impact_df_second_scenario["valuation_increase_average"].median()
+    )
+
+    # Bill savings per property
+    min_bill_savings_second_scenario, max_bill_savings_second_scenario, average_bill_savings_second_scenario = (
+        units_hitting_target_second_scenario["total_bill_savings"].min(),
+        units_hitting_target_second_scenario["total_bill_savings"].max(),
+        units_hitting_target_second_scenario["total_bill_savings"].mean()
+    )
+
+    # Total CO2 reduction of portfolio
+    (
+        min_co2_reduction_second_scenario,
+        max_co2_reduction_second_scenario,
+        average_co2_reduction_second_scenario,
+        total_co2_reduction_second_scenario
+    ) = (
+        units_hitting_target_second_scenario["total_carbon"].min(),
+        units_hitting_target_second_scenario["total_carbon"].max(),
+        units_hitting_target_second_scenario["total_carbon"].mean(),
+        units_hitting_target_second_scenario["total_carbon"].sum()
+    )
+
+    # Values for the leftovers
+    units_missing_second_scenario = recommendations_summary_second_scenario[
+        (recommendations_summary_second_scenario["expected_epc_rating"] != EPC_TARGET) &
+        (recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
+        ]
+
+    min_difference_second_scenario, max_difference_second_scenario, average_difference_second_scenario = (
+        np.ceil(units_missing_second_scenario["sap_difference"].min()),
+        np.ceil(units_missing_second_scenario["sap_difference"].max()),
+        np.ceil(units_missing_second_scenario["sap_difference"].mean())
+    )
+
+    slide_3_text = (
+        f"{n_units_missed_target} units look like they would miss the EPC {EPC_TARGET} by {min_difference}-"
+        f"{max_difference} points \n"
+        "When on site, an assessor may be able to identify further improvements to bring the properties up to an EPC "
+        f"{EPC_TARGET}.\n"
+        f"We have looked at a more extensive package for these properties, including: {second_scenario_measures}\n"
+        f"Of the {n_units_missed_target} properties, a further {units_hitting_target_second_scenario.shape[0]} are "
+        f"expected to achieve EPC {EPC_TARGET} with these measures.\n"
+        f"Expected cost: {expected_cost_per_unit_lower_second_scenario} - "
+        f"{expected_cost_per_unit_upper_second_scenario}, "
+        f"total project: £"
+        f"{expected_project_cost_second_scenario}\n"
+        f"Valuation increase per property: £{min_valuation_impact_second_scenario}-"
+        f"{max_valuation_impact_second_scenario}, average: £"
+        f"{average_valuation_impact_second_scenario}\n"
+        f"Bill savings per property: £{min_bill_savings_second_scenario}-{max_bill_savings_second_scenario}, "
+        f"average: £{average_bill_savings_second_scenario}\n"
+        f"Total CO2 reduction: {min_co2_reduction_second_scenario}-{max_co2_reduction_second_scenario} tonnes, "
+        f"average: "
+        f"{average_co2_reduction_second_scenario}\n"
+        f"tonnes, total for the {n_units_hitting_second_scenario} properties: {total_co2_reduction_second_scenario} "
+        f"tonnes\n"
+        f"Even in the second scenario, the remaining {units_missing_second_scenario.shape[0]} properties are expected "
+        f"to miss EPC {EPC_TARGET} by {min_difference_second_scenario} point on average - they should be visited by "
+        f"an assessor"
+    )
+
+    slide_data = {
+        'slide_1': {
+            "title": "EPC Rating Distribution",
+            'image_path': figure_path,  # Pass the path to the saved image
+            "text": slide_1_commentary
+        },
+        "slide_2": {
+            "title": f"Properties that achieve EPC {EPC_TARGET}",
+            "text": slide_2_commentary,
+        },
+        "slide 3": {
+            "title": f"Properties that miss EPC {EPC_TARGET}",
+            "text": slide_3_text
+        }
+    }
+
+    save_location = f"etl/customers/{CUSTOMER_KEY}/{CUSTOMER_KEY}_tech_slides.pptx"
+    create_powerpoint(slide_data, save_location)
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@ -0,0 +1,787 @@
+from recommendations.recommendation_utils import convert_thickness_to_numeric
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
+from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
+
+
+class Eligibility:
+    """
+    Given the epc data about a property, this class holds the logic for determining if the home
+    is eligible for a specific retrofit measure.
+
+    For example, this could be whether the loft has insulation below a standardised threshold, or
+    if it has an empty cavity
+
+    Further to this, this class is responsible for determining if the property is suitable for specific funding
+    schemes
+    """
+
+    loft = None
+    cavity = None
+    solid_wall = None
+    room_roof = None
+    flat_roof = None
+    suspended_floor = None
+    solid_floor = None
+
+    # schemes based on Warmfront now
+    gbis_warmfront = None
+    eco4_warmfront = None
+    # Schemes based on full eligibility
+    gbis = None
+    eco4 = None
+
+    # If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
+    LOFT_INSULATION_THRESHOLD = 100
+    HIGH_LOFT_INSULATION_THRESHOLD = 269
+
+    # Because EPCS have different values for tenure, we need to remap them to a common set of values
+    tenure_remap = {
+        'NO DATA!': "unknown",
+        'Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is no':
+            "unknown",
+        'Owner-occupied': 'Owner-occupied',
+        'Rented (private)': 'Rented (private)',
+        'Rented (social)': 'Rented (social)',
+        'owner-occupied': 'Owner-occupied',
+        'rental (private)': 'Rented (private)',
+        'rental (social)': 'Rented (social)',
+        'unknown': "unknown",
+    }
+
+    def __init__(self, epc, cleaned):
+        self.epc = epc
+        self.cleaned = cleaned
+
+        self.walls = self.parse_fabric("walls-description")
+        self.roof = self.parse_fabric("roof-description")
+        self.floor = self.parse_fabric("floor-description")
+
+        self.tenure = self.tenure_remap.get(self.epc["tenure"], None)
+
+    def parse_fabric(self, key):
+
+        # Get the cleaned version of the description
+        remapped = [
+            data for data in self.cleaned[key] if
+            data["original_description"] == self.epc[key]
+        ]
+        if remapped:
+            return remapped[0]
+
+        if "SAP05:" in self.epc[key]:
+            # This is a placeholder method for handling this but this will occur in the case of a very old
+            # EPC and therefore we just skip
+            self.epc[key] = "(assumed)"
+
+        if key == "walls-description":
+            cleaner_cls = WallAttributes(self.epc[key])
+
+        elif key == "roof-description":
+            cleaner_cls = RoofAttributes(self.epc[key])
+
+        elif key == "floor-description":
+            cleaner_cls = FloorAttributes(self.epc[key])
+
+        else:
+            raise ValueError("Invalid key")
+        output = cleaner_cls.process()
+        output["clean_description"] = cleaner_cls.description.replace("(assumed)", "").rstrip().capitalize()
+
+        return output
+
+    def loft_insulation(self, loft_thickness_threshold: int = None):
+        """
+        Given the description of roof, this function determines whether or not the property is suitable for loft
+        insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to
+        be suitable for loft insulation
+        :param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than
+                                         this thickness is deemed to be suitable for loft insulation. If this
+                                         parameter is not provided, this method will default to the variable specified
+                                         in LOFT_INSULATION_THRESHOLD
+        """
+
+        loft_thickness_threshold = (
+            self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
+        )
+
+        high_loft_thickness_threshold = self.HIGH_LOFT_INSULATION_THRESHOLD
+
+        # We firstly check if the roof is a loft
+        is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
+
+        if not is_loft:
+            self.loft = {
+                "suitability": False,
+                "thickness": None,
+                "reason": "roof not loft",
+                "thickness_classification": None
+            }
+            return
+
+        # If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use
+        insulation_thickness = convert_thickness_to_numeric(
+            string_thickness=self.roof["insulation_thickness"],
+            is_pitched=self.roof["is_pitched"],
+            is_flat=self.roof["is_flat"]
+        )
+
+        if insulation_thickness <= 100:
+            thickness_classification = "0-100mm"
+        elif insulation_thickness <= high_loft_thickness_threshold:
+            thickness_classification = "100-270mm"
+        else:
+            thickness_classification = "270mm+"
+
+        if insulation_thickness <= loft_thickness_threshold:
+            # We produce a thiclkness classification for the loft
+            # 0 - 100mm insulation
+            # 100 - 270mm insulation
+            # 270mm+ insulation
+
+            self.loft = {
+                "suitability": True,
+                "thickness": insulation_thickness,
+                "reason": None,
+                "thickness_classification": thickness_classification
+            }
+            return
+
+        # Insulation is already thick enough
+        self.loft = {
+            "suitability": False,
+            "thickness": insulation_thickness,
+            "reason": "existing insulation",
+            "thickness_classification": thickness_classification
+        }
+        return
+
+    def cavity_insulation(self):
+
+        """
+        Given the description of the walls, this function determines if the property is suitable for cavity wall
+        insulation
+        :return:
+        """
+
+        is_cavity = self.walls["is_cavity_wall"]
+        is_empty = (not self.walls["is_filled_cavity"])
+        is_as_built = (
+            self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
+            and self.walls["is_assumed"]
+        )
+        is_partial_filled = "partial" in self.walls["clean_description"].lower()
+        # We look for potentially under performing cavities - anything that is assumed, as built and insulated
+        is_underperforming = (
+            self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"]
+        )
+
+        is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
+        is_partial_filled_cavity = is_cavity and is_partial_filled
+        is_assumed_filled_cavity = is_cavity and is_as_built
+        is_underperforming_cavity = is_cavity and is_underperforming
+
+        # Check if it has internal or external wall insulation
+        has_internal_wall_insulation = self.walls["internal_insulation"]
+        has_external_wall_insulation = self.walls["external_insulation"]
+
+        if has_internal_wall_insulation or has_external_wall_insulation:
+            self.cavity = {
+                "suitability": False,
+                "type": "internal or external wall insulation"
+            }
+            return
+
+        if is_unfilled_cavity:
+            self.cavity = {
+                "suitability": True,
+                "type": "empty",
+            }
+            return
+
+        if is_assumed_filled_cavity:
+            self.cavity = {
+                "suitability": True,
+                "type": "as built assumed",
+            }
+            return
+
+        if is_partial_filled_cavity:
+            self.cavity = {
+                "suitability": True,
+                "type": "partial"
+            }
+            return
+
+        if is_underperforming_cavity:
+            self.cavity = {
+                "suitability": True,
+                "type": "underperforming"
+            }
+            return
+
+        self.cavity = {
+            "suitability": False,
+            "type": "full"
+        }
+
+    def solid_wall_insulation(self):
+        """
+        Given the description of the walls, this function determines if the property is suitable for solid wall
+        insulation
+        :return:
+        """
+
+        is_solid = self.walls["is_solid_brick"]
+        is_insulated = self.walls["insulation_thickness"] in ["average", "above average"]
+
+        if is_solid and is_insulated:
+            self.solid_wall = {
+                "suitability": True,
+            }
+            return
+
+        self.solid_wall = {
+            "suitability": False,
+        }
+
+    def room_roof_insulation(self):
+        is_room_roof = self.roof["is_roof_room"]
+
+        if not is_room_roof:
+            self.room_roof = {
+                "suitability": False,
+                "thickness": None
+            }
+            return
+
+        insulation_thickness = convert_thickness_to_numeric(
+            self.roof["insulation_thickness"],
+            self.roof["is_pitched"],
+            self.roof["is_flat"]
+        )
+
+        self.room_roof = {
+            "suitability": is_room_roof and insulation_thickness == 0,
+            "thickness": insulation_thickness
+        }
+
+    def flat_roof_insulation(self):
+        is_flat = self.roof["is_flat"]
+
+        if not is_flat:
+            self.flat_roof = {
+                "suitability": False,
+                "thickness": None
+            }
+            return
+
+        insulation_thickness = convert_thickness_to_numeric(
+            self.roof["insulation_thickness"],
+            self.roof["is_pitched"],
+            self.roof["is_flat"]
+        )
+
+        self.flat_roof = {
+            "suitability": is_flat and insulation_thickness <= 100,
+            "thickness": insulation_thickness
+        }
+
+    def suspended_floor_insulation(self):
+
+        if "no_data" in self.floor.keys():
+            if self.floor["no_data"]:
+                self.suspended_floor = {
+                    "suitability": False,
+                }
+                return
+
+        is_suspended = self.floor["is_suspended"]
+        is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
+
+        self.suspended_floor = {
+            "suitability": is_suspended and (not is_insulated),
+        }
+        return
+
+    def solid_floor_insulation(self):
+
+        if "no_data" in self.floor.keys():
+            if self.floor["no_data"]:
+                self.solid_floor = {
+                    "suitability": False,
+                }
+                return
+
+        is_solid = self.floor["is_solid"]
+        is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
+
+        self.solid_floor = {
+            "suitability": is_solid and (not is_insulated),
+        }
+        return
+
+    def check_gbis_warmfront(self):
+        """
+        The Eligibility criteria for the Great British Insulation Scheme (GBIS) can be found here:
+        https://www.ofgem.gov.uk/environmental-and-social-schemes/great-british-insulation-scheme/homeowners-and-tenants
+
+        At a high level, the criteria is the following:
+        - The home must be within council tax bands A-D in England, A-E in Scotland, A-E in Wales
+        - It must have an EPC rating of D or below
+
+        For the moment, we won't check whether a property is in the correct council tax band. There is likely
+        to be public data for this since there is a govenment website which allows you to search for properties:
+        https://www.gov.uk/council-tax-bands
+        This data is possibly contained on the council tax valuation list but it remains to be see (seems unlikely)
+        whether or not the data is openly accessible
+        https://www.gov.uk/government/statistics/quality-assurance-of-administrative-data-in-the-uk-house-price-index
+        /valuation-office-agency-council-tax-valuation-lists
+
+        Currently, we tailor this module to the Warmfront Team and their delivery capabilities (both practically and
+        commercially). Therefore, we will check:
+        1) Whether the property is an EPC D or below
+        2) Whether the property is suitible for cavity wall insulation
+
+        However, GBIS applies to many insulation measures, which can be seen in the ofgem document
+
+        GBIS does not have any minimum upgrade requirement so we don't need to simulate the post retrofit sap score
+        using the machine learning model
+        """
+
+        # Check if the property is suitable for cavity wall
+        self.cavity_insulation()
+
+        current_sap = int(self.epc["current-energy-efficiency"])
+        # We have a strict suitability check and a non-strict check
+
+        # Perfect strictness
+        if (self.cavity["type"] == "empty") and (current_sap < 69):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
+            }
+            return
+
+        # Near perfect
+        if self.cavity["suitability"] and (current_sap < 69):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Near perfect suitability",
+            }
+            return
+
+        self.gbis_warmfront = {
+            "eligible": False,
+            "strict": False,
+            "message": "All conditions fail",
+        }
+
+    def check_eco4_warmfront(self):
+        """
+        This funciton will check if the property is eligible for funding under the ECO4 scheme
+
+        For the moment, this function will consider just measures that can be implemented by the
+        Warmfront team, therefore we will only check if a property has an uninsulated loft AND uninsulated
+        cavity
+
+        We use Ofgem's V1.1 ECO 4 guidance document for the conditions under which a property is elligible
+        This document can be found here:
+        https://www.ofgem.gov.uk/sites/default/files/2023-02/ECO4%20Delivery%20Guidance%20v1.1%20%281%29.pdf
+
+        The conditions (to be reviewed) to be eligible for retrofit, under ECO4, are the following:
+        1) The property is a social home (This is assumed prior to this function as this code will often
+        be run on property lists provided by a HA
+        2) The property is an EPC E or below
+        3) The property has an unfilled cavity and uninsulated loft
+        4) After retrofit, the property will hit an EPC C
+
+        Note: This criteria will likely be adjusted depending on the properties that can be served right now
+
+        If the post_retrofit_sap is provided, then is this value is 69 or higher, the property will be deemed
+        to be eligible for ECO4 funding. If the post_retrofit_sap is not provided, the property will be
+        deemed to be eligible, conditional to the post_retrofit_sap score check
+        :param post_retrofit_sap:
+        :return:
+        """
+
+        current_sap = int(self.epc["current-energy-efficiency"])
+        self.cavity_insulation()
+        self.loft_insulation()
+
+        # We put in a placeholder when the roof is not a loft
+        if self.loft["reason"] == "roof not loft":
+            self.loft["thickness"] = 999
+
+        # Case 1: No conditions meet
+        if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "All conditions fail",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 2 - perfect match
+        if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 2.5 - near perfect match - but we would not recommend this using the model
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Near perfect suitability",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 3 - cavity is suitable, loft is within 150mm, sap is good
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity, loft borderline, meets sap",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 3 - cavity is suitable, loft is not, sap is good
+        if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity and sap",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible
+        if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "failed fabric check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 5 - cavity and loft suitable, sap too high
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets fabric, fails SAP check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 6 - meets just cavity
+        if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets just cavity",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 7 - fails cavity, loft but meets sap
+        if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "Fails cavity and loft, meets SAP",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 8 - fails cavity, meets loft, fails sap
+        if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "Fails cavity, meets loft, fails SAP",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        raise ValueError("Implement me")
+
+    def check_gbis(self):
+
+        """
+        The Eligibility criteria for the Great British Insulation Scheme (GBIS) can be found here:
+        https://www.ofgem.gov.uk/environmental-and-social-schemes/great-british-insulation-scheme/homeowners-and-tenants
+
+        Full delivery guidance and be downloaded here:
+        https://www.ofgem.gov.uk/sites/default/files/2023-08/Great%20British%20Insulation%20Scheme%20Delivery
+        %20Guidance%20V101693416860968.pdf
+
+        For social housing, the criteria is the following:
+
+        If the property is currently an EPC D:
+        - It's valid for innovation measures only but not a heating control measure
+        - The property must be rented at below the market rate. All eligible social housing is treated based on the
+          low income group, therefore the tennant must be in receipt of one the eligible benefits
+
+        If the property is currently an EPC E or below:
+        - It's valid for all eligible insulation measures
+        - The property must be rented at below the market rate. All eligible social housing is treated based on the
+          low income group, therefore the tennant must be in receipt of one the eligible benefits
+
+        From GBIS guidance document:
+        Determining whether the premises are let below market rate
+
+        3.101 Social housing under this provision will only be eligible where the housing is let below
+        the market rate. The supplier must produce a declaration signed by a social landlord
+        providing confirmation that the social housing premises are let below the market rate,
+        or where the premises are currently void, have previously and will be let below the
+        market rate. The declaration to be signed by a social landlord is included within the
+        Eligibility and Pre-Retrofit Declaration form. This declaration form must be retained by
+        suppliers and be available on request for audit purposes.
+
+        3.102 Where social housing is let at or above the market rate, the property can be treated as
+        a private domestic premises, where the occupant meets the eligibility requirements.
+        See section on PRS from paragraph 1.13 for more information.
+
+        This method searches ALL of the possible measures that can be implemented under GBIS. This includes:
+        - cavity wall (including party wall)
+        - loft
+        - solid wall
+        - pitched roof
+        - flat roof
+        - under-floor
+        - solid floor
+        - park home
+        - room-in-roof
+
+        :return:
+        """
+
+        self.cavity_insulation()
+        self.loft_insulation()
+        self.solid_wall_insulation()
+        self.room_roof_insulation()
+        self.flat_roof_insulation()
+        self.suspended_floor_insulation()
+        self.solid_floor_insulation()
+
+        current_sap = int(self.epc["current-energy-efficiency"])
+        is_below_e = current_sap <= 54
+        is_below_c = current_sap <= 68
+
+        needs_measure = (
+            self.cavity["suitability"] or
+            self.loft["suitability"] or
+            self.solid_wall["suitability"] or
+            self.room_roof["suitability"] or
+            self.flat_roof["suitability"] or
+            self.suspended_floor["suitability"] or
+            self.solid_floor["suitability"]
+        )
+
+        if self.tenure == "Rented (social)":
+
+            if is_below_c and (not is_below_e):
+                # this is a placeholder methodology
+                self.gbis = {
+                    "eligible": int(self.epc["potential-energy-efficiency"]) > 68,
+                    "message": "contingent on innovation measure delivery"
+                }
+                return
+            elif is_below_e:
+                self.gbis = {
+                    "eligible": needs_measure,
+                    "message": "eligible under fabric measure"
+                }
+                return
+            else:
+                self.gbis = {
+                    "eligible": False,
+                    "message": "not eligible"
+                }
+                return
+
+        elif self.tenure == "Rented (private)":
+            self.gbis = {
+                "eligible": is_below_c and needs_measure,
+                "message": "eligible under fabric measure"
+            }
+            return
+        elif self.tenure == "Owner-occupied":
+            self.gbis = {
+                "eligible": False,
+                "message": "Out-of-scope"
+            }
+            return
+
+        elif (self.tenure is None) or self.tenure == "unknown":
+            self.gbis = {
+                "eligible": needs_measure,
+                "message": "unknown tenure"
+            }
+            return
+        else:
+            raise ValueError("Implement me other tenure types")
+
+    def check_eco4(self):
+        """
+        Because ECO4 supports nearly all measures. If we have commercial agreements in place then a large number
+        of homes would be eligible for eco funding, if identified.
+
+        These are the eligibility criteria we consider for this process:
+        Privately rented, Help to heat group
+        - Sap E-G
+        - Must receive one of solid wall insulation, first time central heating or district heating control
+        - The property must already have cavity walls and roof insulated
+
+        Social Housing, SAP D
+        - Innovation measures and insulation measures to meet the minimum insulation requirement
+        - Improvement to at least band C
+        - Fabric measures
+        - If receiving any heating measures, must have at least one insulation measure first
+
+        Social Housing, SAP E-G
+        - Insulation measures, first time central heating, renewable heating, district heating connection,
+        innovation measures
+        - Improvement to D (F & G properties) or C (E properties)
+        - If receiving any heating measure, must already have cavity and roof insulation
+
+        Privately rented, ECO4 Flex route 1, 2, 3, 4
+        - Must have SAP E-G
+        - Most measures eligible, but must receive one of solid wall insulation, first time central heating,
+        renewable heating and district heating control
+        - Improvement to D (F & G properties) or C (E properties)
+        - All homes receiving heating measures must first have insulated cavity/roof
+
+
+        The flex routes are given here:
+        https://so-eco.co.uk/what-is-eco4-flex/#:~:text=One%20way%20to%20gain%20ECO4,
+        including%20elderly%20residents%20and%20lodgers.
+
+        :return:
+        """
+
+        self.cavity_insulation()
+        self.loft_insulation()
+        self.solid_wall_insulation()
+        self.room_roof_insulation()
+        self.flat_roof_insulation()
+        self.suspended_floor_insulation()
+        self.solid_floor_insulation()
+
+        current_sap = int(self.epc["current-energy-efficiency"])
+        is_below_e = current_sap <= 54
+        is_below_c = current_sap <= 68
+        sap_potential = int(self.epc["potential-energy-efficiency"])
+
+        first_time_central_heating = "boiler" not in self.epc["mainheat-description"].lower()
+
+        needs_fabric_measure = (
+            self.cavity["suitability"] or
+            self.loft["suitability"] or
+            self.solid_wall["suitability"] or
+            self.room_roof["suitability"] or
+            self.flat_roof["suitability"] or
+            self.suspended_floor["suitability"] or
+            self.solid_floor["suitability"]
+        )
+
+        if current_sap <= 38 and sap_potential >= 55:
+            # sap needs to get to at least a D
+            expected_to_meet_upgrades = True
+        elif current_sap <= 68 and sap_potential >= 69:
+            # sap needs to get to at least a C
+            expected_to_meet_upgrades = True
+        else:
+            expected_to_meet_upgrades = False
+
+        if self.tenure == "Rented (social)":
+            if is_below_c and (not is_below_e) and expected_to_meet_upgrades:
+                # If the property is a D, then it's eligible under innovation measures but requires improvement to a
+                # band C
+                self.eco4 = {
+                    "eligible": True,
+                    "message": "eligible under innovation measure and improvement to band C"
+                }
+            elif is_below_e and expected_to_meet_upgrades:
+                # If the property is an E or below, then it's eligible under fabric measures or heating/innovation
+                # measures
+
+                message = "eligible under fabric measures, with sufficient post retrofit sap improvement" if (
+                    needs_fabric_measure) else (
+                    "eligible under heating and innovation measures, with sufficient post retrofit sap improvement"
+                )
+
+                self.eco4 = {"eligible": True, "message": message}
+            else:
+                if (current_sap <= 68) and expected_to_meet_upgrades:
+                    raise ValueError("something is wrong")
+                self.eco4 = {
+                    "eligible": False,
+                    "message": "not eligible, above EPC C"
+                }
+
+            return
+
+        if self.tenure == 'Rented (private)':
+            # For private homes, the property needs to be an E or below
+
+            # For private homes, the cavity must be filled and the roof insulated
+            cavity_filled = not self.cavity["suitability"]
+            roof_insulated = (not self.loft["suitability"]) and (not self.room_roof["suitability"]) and (
+                not self.flat_roof["suitability"])
+
+            if is_below_e and cavity_filled and roof_insulated and expected_to_meet_upgrades:
+
+                if self.solid_wall["suitability"]:
+                    self.eco4 = {
+                        "eligible": True,
+                        "message": "eligible under solid wall insulation, conditional on post retrofit sap and help "
+                                   "to heat/ECO flex route"
+                    }
+                elif first_time_central_heating:
+
+                    self.eco4 = {
+                        "eligible": True,
+                        "message": "eligible under first time central heating, conditional on post retrofit sap and "
+                                   "help to heat/ECO flex route"
+                    }
+                else:
+                    self.eco4 = {
+                        "eligible": False,
+                        "message": "not eligible at this time"
+                    }
+
+                return
+
+            else:
+                self.eco4 = {
+                    "eligible": False,
+                    "message": "not eligible at this time, EPC too high"
+                }
+
+        self.eco4 = {
+            "eligible": False,
+            "message": "Out of scope"
+        }
--- a/etl/eligibility/README.md
+++ b/etl/eligibility/README.md
@ -0,0 +1,6 @@
+# Eligiblity
+
+This codebase is responsible for determining if properties look like they would be
+eligible for retrofit funding schemes. In order to do this, we use our SAP ML model, to score
+what the property would look like after a retrofit. We then compare this to the eligibility
+criteria of various schemes, to determing if the property looks likely to be eligible for funding.
--- a/etl/eligibility/init.py
+++ b/etl/eligibility/init.py
--- a/etl/eligibility/ha_15_32/HA
+++ b/etl/eligibility/ha_15_32/HA
@ -0,0 +1,664 @@
+Housing Association,No.,Address,Postcode
+HA15,2,2 Lander Road,HP19 9TT
+HA15,4,4 Lander Road,HP19 9TT
+HA15,5,5 Lander Road,HP19 9TT
+HA15,12,12 Lander Road,HP19 9TT
+HA15,14,14 Lander Road,HP19 9TT
+HA15,18,18 Lander Road,HP19 9TT
+HA15,22,22 Lander Road,HP19 9TT
+HA15,1,1 Eeles Close,HP19 9TU
+HA15,2,2 Eeles Close,HP19 9TU
+HA15,3,3 Eeles Close,HP19 9TU
+HA15,12,12 Eeles Close,HP19 9TU
+HA15,15,15 Eeles Close,HP19 9TU
+HA15,2,2 Dicks Way,HP19 9UA
+HA15,4,4 Dicks Way,HP19 9UA
+HA15,5,5 Dicks Way,HP19 9UA
+HA15,6,6 Dicks Way,HP19 9UA
+HA15,8,8 Dicks Way,HP19 9UA
+HA15,9,9 Dicks Way,HP19 9UA
+HA15,14,14 Dicks Way,HP19 9UA
+HA15,15,15 Dicks Way,HP19 9UA
+HA15,17,17 Dicks Way,HP19 9UA
+HA15,20,20 Dicks Way,HP19 9UA
+HA15,26,26 Dicks Way,HP19 9UA
+HA15,28,28 Dicks Way,HP19 9UA
+HA15,4,4 Fletcher Close,HP19 9UB
+HA15,5,5 Fletcher Close,HP19 9UB
+HA15,24,24 Fletcher Close,HP19 9UB
+HA15,25,25 Fletcher Close,HP19 9UB
+HA15,27,27 Fletcher Close,HP19 9UB
+HA15,28,28 Fletcher Close,HP19 9UB
+HA15,29,29 Fletcher Close,HP19 9UB
+HA15,31,31 Fletcher Close,HP19 9UB
+HA15,32,32 Fletcher Close,HP19 9UB
+HA15,33,33 Fletcher Close,HP19 9UB
+HA15,34,"34 Fletcher Close,Aylesbury,Bucks",HP19 9UB
+HA15,1,1 Grimmer Close,HP19 9UD
+HA15,11,11 Grimmer Close,HP19 9UD
+HA15,14,14 Grimmer Close,HP19 9UD
+HA15,15,15 Grimmer Close,HP19 9UD
+HA15,17,17 Grimmer Close,HP19 9UD
+HA15,18,18 Grimmer Close,HP19 9UD
+HA15,21,21 Grimmer Close,HP19 9UD
+HA15,23,23 Grimmer Close,HP19 9UD
+HA15,24,24 Grimmer Close,HP19 9UD
+HA15,28,28 Grimmer Close,HP19 9UD
+HA15,30,30 Grimmer Close,HP19 9UD
+HA15,1,1 Vincent Road,HP19 9UN
+HA15,6,6 Vincent Road,HP19 9UN
+HA15,10,10 Vincent Road,HP19 9UN
+HA15,12,12 Vincent Road,HP19 9UN
+HA15,13,13 Vincent Road,HP19 9UN
+HA15,16,16 Vincent Road,HP19 9UN
+HA15,21,21 Vincent Road,HP19 9UN
+HA15,24,24 Vincent Road,HP19 9UN
+HA15,26,26 Vincent Road,HP19 9UN
+HA15,27,27 Vincent Road,HP19 9UN
+HA15,32,32 Vincent Road,HP19 9UN
+HA15,1,1 Reading Close,HP19 9UW
+HA15,2,2 Reading Close,HP19 9UW
+HA15,3,3 Reading Close,HP19 9UW
+HA15,4,4 Reading Close,HP19 9UW
+HA15,5,5 Reading Close,HP19 9UW
+HA15,6,6 Reading Close,HP19 9UW
+HA15,7,7 Reading Close,HP19 9UW
+HA15,9,9 Reading Close,HP19 9UW
+HA15,10,10 Reading Close,HP19 9UW
+HA15,6,6 Mary Mac Manus Drive,MK18 1UN
+HA15,8,8 Mary Mac Manus Drive,MK18 1UN
+HA15,10,10 Mary Mac Manus Drive,MK18 1UN
+HA15,2,"2 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,7,"7 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,9,"9 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,11,"11 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,12,"12 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,16,"16 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,17,"17 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,26,"26 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,38,"38 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,41,"41 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
+HA15,25,"25 New Road Weston Turville, Aylesbury",HP22 5RA
+HA15,27,"27 New Road Weston Turville, Aylesbury",HP22 5RA
+HA15,29,"29 New Road Weston Turville, Aylesbury",HP22 5RA
+HA15,31,"31 New Road Weston Turville, Aylesbury",HP22 5RA
+HA15,37,"37 New Road Weston Turville, Aylesbury",HP22 5RA
+HA15,39,"39 New Road Weston Turville, Aylesbury",HP22 5RA
+HA15,5,"5 Walton Place Weston Turville, Aylesbury",HP22 5RB
+HA15,9,"9 Walton Place Weston Turville, Aylesbury",HP22 5RB
+HA15,18,"18 Walton Place Weston Turville, Aylesbury",HP22 5RB
+HA15,21,"21 Walton Place Weston Turville, Aylesbury",HP22 5RD
+HA15,36,"36 Walton Place Weston Turville, Aylesbury",HP22 5RD
+HA15,42,"42 Walton Place Weston Turville, Aylesbury",HP22 5RD
+HA15,46,"46 Walton Place Weston Turville, Aylesbury",HP22 5RD
+HA15,76,"76 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
+HA15,78,"78 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
+HA15,82,"82 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
+HA15,84,"84 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
+HA15,86,"86 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
+HA15,88,"88 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
+HA15,64,"64 Halton Lane Wendover, Aylesbury",HP22 6AZ
+HA15,66,"66 Halton Lane Wendover, Aylesbury",HP22 6AZ
+HA15,68,"68 Halton Lane Wendover, Aylesbury",HP22 6AZ
+HA15,70,"70 Halton Lane Wendover, Aylesbury",HP22 6AZ
+HA15,8,"8 South Street Wendover, Aylesbury",HP22 6EF
+HA15,2,"2 Barlow Road Wendover, Aylesbury",HP22 6HP
+HA15,4,"4 Barlow Road Wendover, Aylesbury",HP22 6HP
+HA15,14,"14 Barlow Road Wendover, Aylesbury",HP22 6HP
+HA15,15,"15 Barlow Road Wendover, Aylesbury",HP22 6HP
+HA15,16,"16 Barlow Road Wendover, Aylesbury",HP22 6HP
+HA15,28,"28 Barlow Road Wendover, Aylesbury",HP22 6HP
+HA15,1,"1 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,5,"5 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,7,"7 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,8,"8 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,9,"9 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,13,"13 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,16,"16 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,20,"20 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,24,"24 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,26,"26 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,28,"28 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,38,"38 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,44,"44 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,50,"50 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
+HA15,15,"15 Hampden Road Wendover, Aylesbury",HP22 6HU
+HA15,18,"18 Hampden Road Wendover, Aylesbury",HP22 6HU
+HA15,22,"22 Hampden Road Wendover, Aylesbury",HP22 6HU
+HA15,26,"26 Hampden Road Wendover, Aylesbury",HP22 6HU
+HA15,28,"28 Hampden Road Wendover, Aylesbury",HP22 6HU
+HA15,25,"25 Hampden Road Wendover, Aylesbury",HP22 6HX
+HA15,27,"27 Hampden Road Wendover, Aylesbury",HP22 6HX
+HA15,31,"31 Hampden Road Wendover, Aylesbury",HP22 6HX
+HA15,34,"34 Hampden Road Wendover, Aylesbury",HP22 6HX
+HA15,36,"36 Hampden Road Wendover, Aylesbury",HP22 6HX
+HA15,38,"38 Hampden Road Wendover, Aylesbury",HP22 6HX
+HA15,5,"5 Gainsborough Road, Aylesbury",HP21 9AZ
+HA15,1,"1 Dart Close, Aylesbury",HP21 9NP
+HA15,1,"1 Wingrave Road Aston Abbotts, Aylesbury",HP22 4LT
+HA15,3,"3 Wingrave Road Aston Abbotts, Aylesbury",HP22 4LT
+HA15,5,"5 Wingrave Road Aston Abbotts, Aylesbury",HP22 4LT
+HA15,82,"82 Winslow Road Wingrave, Aylesbury",HP22 4QB
+HA15,84,"84 Winslow Road Wingrave, Aylesbury",HP22 4QB
+HA15,106,"106 Winslow Road Wingrave, Aylesbury",HP22 4QB
+HA15,125,"125 Winslow Road Wingrave, Aylesbury",HP22 4QB
+HA15,19,"19 Abbotts Way Wingrave, Aylesbury",HP22 4QF
+HA15,37,"37 Abbotts Way Wingrave, Aylesbury",HP22 4QF
+HA15,41,"41 Abbotts Way Wingrave, Aylesbury",HP22 4QF
+HA15,43,"43 Abbotts Way Wingrave, Aylesbury",HP22 4QF
+HA15,2,"2 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,5,"5 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,10,"10 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,12,"12 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,19,"19 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,21,"21 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,22,"22 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,31,"31 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,32,"32 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,33,"33 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,34,"34 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,35,"35 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,37,"37 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,38,"38 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,40,"40 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,42,"42 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
+HA15,23,"23 Great Lane Bierton, Aylesbury",HP22 5DE
+HA15,25,"25 Great Lane Bierton, Aylesbury",HP22 5DE
+HA15,35,"35 Great Lane Bierton, Aylesbury",HP22 5DE
+HA15,37,"37 Great Lane Bierton, Aylesbury",HP22 5DE
+HA15,61,"61 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
+HA15,65,"65 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
+HA15,67,"67 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
+HA15,69,"69 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
+HA15,28,"28a Tring Road Wendover, Aylesbury",HP22 6NT
+HA15,38,"38a Tring Road Wendover, Aylesbury",HP22 6NT
+HA15,14,"14 Tring Road Wendover, Aylesbury",HP22 6NT
+HA15,34,"34 Tring Road Wendover, Aylesbury",HP22 6NT
+HA15,36,"36 Tring Road Wendover, Aylesbury",HP22 6NT
+HA15,64,"64 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,68,"68 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,70,"70 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,74,"74 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,76,"76 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,78,"78 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,80,"80 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,90,"90 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,92,"92 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,100,"100 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,104,"104 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,106,"106 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,108,"108 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,114,"114 Tring Road Wendover, Aylesbury",HP22 6NX
+HA15,38,"38 The Beeches Wendover, Aylesbury",HP22 6PB
+HA15,49,"49 The Beeches Wendover, Aylesbury",HP22 6PB
+HA15,54,"54 The Beeches Wendover, Aylesbury",HP22 6PB
+HA15,64,"64 The Beeches Wendover, Aylesbury",HP22 6PB
+HA15,1,"1 Church End Edlesborough, Dunstable",LU6 2EP
+HA15,2,"2 Church End Edlesborough, Dunstable",LU6 2EP
+HA15,5,"5 Church End Edlesborough, Dunstable",LU6 2EP
+HA15,6,"6 Church End Edlesborough, Dunstable",LU6 2EP
+HA15,7,"7 Church End Edlesborough, Dunstable",LU6 2EP
+HA15,9,"9 Church End Edlesborough, Dunstable",LU6 2EP
+HA15,125,"125 High Street Edlesborough, Dunstable",LU6 2ER
+HA15,6,"6 Dove Street Stewkley, Leighton Buzzard",LU7 0HT
+HA15,14,"14 Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
+HA15,32,"32 Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
+HA15,38,"38a Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
+HA15,38,"38b Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
+HA15,75,"75 High Street Cheddington, Leighton Buzzard",LU7 0RG
+HA15,12,"12 New Street Cheddington, Leighton Buzzard",LU7 0RL
+HA15,14,"14 New Street Cheddington, Leighton Buzzard",LU7 0RL
+HA15,16,"16 New Street Cheddington, Leighton Buzzard",LU7 0RL
+HA15,2,"2 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,4,"4 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,10,"10 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,11,"11 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,17,"17 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,19,"19 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,20,"20 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,23,"23 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,25,"25 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,26,"26 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,28,"28 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,31,"31 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,33,"33 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,36,"36 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,40,"40 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
+HA15,4,"4 Barkham Close Cheddington, Leighton Buzzard",LU7 0RT
+HA15,4,"4 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
+HA15,7,"7 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
+HA15,8,"8 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
+HA15,10,"10 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
+HA15,11,"11 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
+HA15,61,"61 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,69,"69 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,71,"71 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,75,"75 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,85,"85 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,87,"87 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,89,"89 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,95,"95 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,101,"101 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,103,"103 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,125,"125 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,129,"129 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,133,"133 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,141,"141 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,151,"151 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
+HA15,48,"48 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
+HA15,52,"52 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
+HA15,54,"54 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
+HA15,58,"58 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
+HA15,1,"1 Maud Janes Close Ivinghoe, Leighton Buzzard",LU7 9ED
+HA15,3,"3 Maud Janes Close Ivinghoe, Leighton Buzzard",LU7 9ED
+HA15,12,"12 Maud Janes Close Ivinghoe, Leighton Buzzard",LU7 9ED
+HA15,26,"26 Ladysmith Road Ivinghoe, Leighton Buzzard",LU7 9EE
+HA15,24,"24 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
+HA15,26,"26 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
+HA15,28,"28 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
+HA15,30,"30 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
+HA15,32,"32 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
+HA15,3,"3 Stonebridge Road, Aylesbury",HP19 9LX
+HA15,102,"102 Coventon Road, Aylesbury",HP19 9ND
+HA15,83,"83 Priory Crescent, Aylesbury",HP19 9NY
+HA15,103,"103 Priory Crescent, Aylesbury",HP19 9NY
+HA15,83,"83 Weedon Road, Aylesbury",HP19 9PA
+HA15,7,"7 Haines Close, Aylesbury",HP19 9TS
+HA15,8,"8 Haines Close, Aylesbury",HP19 9TS
+HA15,9,"9 Haines Close, Aylesbury",HP19 9TS
+HA15,13,"13 Haines Close, Aylesbury",HP19 9TS
+HA15,22,"22 Haines Close, Aylesbury",HP19 9TS
+HA15,39,"39 Haines Close, Aylesbury",HP19 9TS
+HA15,45,"45 Haines Close, Aylesbury",HP19 9TS
+HA15,27,"27 Oakfield Road, Aylesbury",HP20 1LH
+HA15,11,"11 Wingate Walk, Aylesbury",HP20 1LN
+HA15,9,"9 Stanhope Road, Aylesbury",HP20 1LP
+HA15,28,"28 Stanhope Road, Aylesbury",HP20 1LR
+HA15,12,"12 Cleveland Road, Aylesbury",HP20 2AZ
+HA15,20,"20 Cleveland Road, Aylesbury",HP20 2AZ
+HA15,22,"22 Cleveland Road, Aylesbury",HP20 2AZ
+HA15,7,"7 Bryanston Avenue, Aylesbury",HP20 2BA
+HA15,17,"17 Bryanston Avenue, Aylesbury",HP20 2BA
+HA15,36,"36 Bryanston Avenue, Aylesbury",HP20 2BA
+HA15,38,"38 Bryanston Avenue, Aylesbury",HP20 2BA
+HA15,6,"6 Matlock Road, Aylesbury",HP20 2BE
+HA15,9,"9 Lisburn Path, Aylesbury",HP20 2BQ
+HA15,15,"15 Lisburn Path, Aylesbury",HP20 2BQ
+HA15,3,"3 Lansdowne Road, Aylesbury",HP20 2DJ
+HA15,15,"15 Lansdowne Road, Aylesbury",HP20 2DJ
+HA15,4,"4 Caversham Green, Aylesbury",HP20 2DL
+HA15,1,"1 Davies Close, Aylesbury",HP20 2SH
+HA15,62,"62 Stoke Road, Aylesbury",HP21 8BX
+HA15,64,"64 Stoke Road, Aylesbury",HP21 8BX
+HA15,78,"78 Stoke Road, Aylesbury",HP21 8BX
+HA15,4,"4 Court Close, Aylesbury",HP21 8BY
+HA15,7,"7 Clover Lane, Aylesbury",HP21 8DQ
+HA15,25,"25 Clover Lane, Aylesbury",HP21 8DQ
+HA15,31,"31 Clover Lane, Aylesbury",HP21 8DQ
+HA15,53,"53 Birch Court, Aylesbury",HP21 8DS
+HA15,59,"59 Birch Court, Aylesbury",HP21 8DS
+HA15,74,"74 Thrasher Road, Aylesbury",HP21 8DX
+HA15,2,"2 Vicarage Road, Aylesbury",HP21 8EU
+HA15,8,"8 Vicarage Road, Aylesbury",HP21 8EU
+HA15,126,"126 Penn Road, Aylesbury",HP21 8JS
+HA15,128,"128 Penn Road, Aylesbury",HP21 8JS
+HA15,140,"140 Penn Road, Aylesbury",HP21 8JS
+HA15,144,"144 Penn Road, Aylesbury",HP21 8JS
+HA15,146,"146 Penn Road, Aylesbury",HP21 8JS
+HA15,4,"4 Montague Road, Aylesbury",HP21 8JT
+HA15,132,"132 Prebendal Avenue, Aylesbury",HP21 8LF
+HA15,134,"134 Prebendal Avenue, Aylesbury",HP21 8LF
+HA15,138,"138 Prebendal Avenue, Aylesbury",HP21 8LF
+HA15,140,"140 Prebendal Avenue, Aylesbury",HP21 8LF
+HA15,144,"144 Prebendal Avenue, Aylesbury",HP21 8LF
+HA15,15,"15 Oak Green, Aylesbury",HP21 8LJ
+HA15,59,"59 Paterson Road, Aylesbury",HP21 8LW
+HA15,37,"37 Thame Road, Aylesbury",HP21 8LX
+HA15,95,"95 Thame Road, Aylesbury",HP21 8LY
+HA15,3,"3 Edinburgh Place, Aylesbury",HP21 8NG
+HA15,52,"52 Carrington Road, Aylesbury",HP21 8NL
+HA15,9,"9 Hartwell End, Aylesbury",HP21 8NZ
+HA15,12,"12 Hartwell End, Aylesbury",HP21 8NZ
+HA15,21,"21 Hartwell End, Aylesbury",HP21 8PA
+HA15,64,"64 Lavric Road, Aylesbury",HP21 8PF
+HA15,8,"8 Cooks Lane Mursley, Milton Keynes",MK17 0RU
+HA15,47,"47 Green End Great Brickhill, Milton Keynes",MK17 9AT
+HA15,14,"14 Green End Great Brickhill, Milton Keynes",MK17 9AU
+HA15,63,"63 Bourtonville, Buckingham",MK18 1AY
+HA15,2,"2 Bath Lane Terrace, Buckingham",MK18 1DY
+HA15,3,"3 Bath Lane Terrace, Buckingham",MK18 1DY
+HA15,4,"4 Bath Lane Terrace, Buckingham",MK18 1DY
+HA15,3,"3 Westfields, Buckingham",MK18 1DZ
+HA15,5,"5 Westfields, Buckingham",MK18 1DZ
+HA15,6,"6 Westfields, Buckingham",MK18 1DZ
+HA15,8,"8 Westfields, Buckingham",MK18 1DZ
+HA15,10,"10 Westfields, Buckingham",MK18 1DZ
+HA15,13,"13 Westfields, Buckingham",MK18 1DZ
+HA15,14,"14 Westfields, Buckingham",MK18 1DZ
+HA15,15,"15 Westfields, Buckingham",MK18 1DZ
+HA15,18,"18 Westfields, Buckingham",MK18 1DZ
+HA15,19,"19 Westfields, Buckingham",MK18 1DZ
+HA15,20,"20 Westfields, Buckingham",MK18 1DZ
+HA15,21,"21 Westfields, Buckingham",MK18 1DZ
+HA15,24,"24 Westfields, Buckingham",MK18 1DZ
+HA15,27,"27 Westfields, Buckingham",MK18 1DZ
+HA15,28,"28 Westfields, Buckingham",MK18 1DZ
+HA15,29,"29 Westfields, Buckingham",MK18 1DZ
+HA15,31,"31 Westfields, Buckingham",MK18 1DZ
+HA15,32,"32 Westfields, Buckingham",MK18 1DZ
+HA15,35,"35 Westfields, Buckingham",MK18 1DZ
+HA15,49,"49 Westfields, Buckingham",MK18 1DZ
+HA15,51,"51 Westfields, Buckingham",MK18 1DZ
+HA15,53,"53 Westfields, Buckingham",MK18 1DZ
+HA15,55,"55 Westfields, Buckingham",MK18 1DZ
+HA15,57,"57 Westfields, Buckingham",MK18 1DZ
+HA15,60,"60 Westfields, Buckingham",MK18 1DZ
+HA15,2,"2 Grenville Road, Buckingham",MK18 1LR
+HA15,118,"118 Western Avenue, Buckingham",MK18 1LS
+HA15,5,"5 South Hall Maids Moreton, Buckingham",MK18 1QB
+HA15,2,"2 Church Close Maids Moreton, Buckingham",MK18 1QG
+HA15,5,"5 Church Close Maids Moreton, Buckingham",MK18 1QG
+HA15,7,"7 Church Close Maids Moreton, Buckingham",MK18 1QG
+HA15,1,"1 The Leys Main Street, Buckingham",MK18 1QT
+HA15,31a,"31a Springfields Padbury, Buckingham",MK18 2AT
+HA15,31b,"31b Springfields Padbury, Buckingham",MK18 2AT
+HA15,1,"1 Arnolds Close Padbury, Buckingham",MK18 2BG
+HA15,42,"42 Victory Road Steeple Claydon, Buckingham",MK18 2NY
+HA15,50,"50 Victory Road Steeple Claydon, Buckingham",MK18 2NY
+HA15,4,"4 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
+HA15,8,"8 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
+HA15,10,"10 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
+HA15,12,"12 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
+HA15,11,"11 Vicarage Lane Steeple Claydon, Buckingham",MK18 2PR
+HA15,62,"62 Vicarage Lane Steeple Claydon, Buckingham",MK18 2PR
+HA15,64,"64 Vicarage Lane Steeple Claydon, Buckingham",MK18 2PR
+HA15,3,"3 Pound Close Steeple Claydon, Buckingham",MK18 2QL
+HA15,4,"4 Pound Close Steeple Claydon, Buckingham",MK18 2QL
+HA15,6,"6 Oak Leys Steeple Claydon, Buckingham",MK18 2RQ
+HA15,8,"8 Oak Leys Steeple Claydon, Buckingham",MK18 2RQ
+HA15,8,"8 Old Mill Furlong Winslow, Buckingham",MK18 3EX
+HA15,23,"23 Old Mill Furlong Winslow, Buckingham",MK18 3EX
+HA15,24,"24 Old Mill Furlong Winslow, Buckingham",MK18 3EX
+HA15,25,"25 Old Mill Furlong Winslow, Buckingham",MK18 3EX
+HA15,30,"30 Old Mill Furlong Winslow, Buckingham",MK18 3EX
+HA15,32,"32 Old Mill Furlong Winslow, Buckingham",MK18 3EX
+HA15,34,"34 Old Mill Furlong Winslow, Buckingham",MK18 3EX
+HA15,1,"1 Roberts Road Haddenham, Aylesbury",HP17 8HH
+HA15,6,"6 Roberts Road Haddenham, Aylesbury",HP17 8HH
+HA15,11,"11 Roberts Road Haddenham, Aylesbury",HP17 8HH
+HA15,15,"15 Roberts Road Haddenham, Aylesbury",HP17 8HH
+HA15,17,"17 Roberts Road Haddenham, Aylesbury",HP17 8HH
+HA15,18,"18 Roberts Road Haddenham, Aylesbury",HP17 8HH
+HA15,38,"38 Roberts Road Haddenham, Aylesbury",HP17 8HH
+HA15,3,"3 Harts Road Haddenham, Aylesbury",HP17 8HJ
+HA15,9,"9 Harts Road Haddenham, Aylesbury",HP17 8HJ
+HA15,11,"11 Harts Road Haddenham, Aylesbury",HP17 8HJ
+HA15,16,"16 Harts Road Haddenham, Aylesbury",HP17 8HJ
+HA15,18,"18 Harts Road Haddenham, Aylesbury",HP17 8HJ
+HA15,22,"22 Harts Road Haddenham, Aylesbury",HP17 8HJ
+HA15,2,"2 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,4,"4 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,5,"5 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,8,"8 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,20,"20 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,21,"21 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,22,"22 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,26,"26 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,29,"29 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,31,"31 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,33,"33 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,35,"35 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,37,"37 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,39,"39 Willis Road Haddenham, Aylesbury",HP17 8HL
+HA15,5,"5 Woodways Haddenham, Aylesbury",HP17 8HW
+HA15,7,"7 Woodways Haddenham, Aylesbury",HP17 8HW
+HA15,13,"13 Woodways Haddenham, Aylesbury",HP17 8HW
+HA15,19,"19 Woodways Haddenham, Aylesbury",HP17 8HW
+HA15,1,"1 Woodlands Butte Furlong, Aylesbury",HP17 8JE
+HA15,2,"2 Franklin Road Haddenham, Aylesbury",HP17 8LE
+HA15,8,"8 Franklin Road Haddenham, Aylesbury",HP17 8LE
+HA15,129,"129 Churchway Haddenham, Aylesbury",HP17 8LG
+HA15,133,"133 Churchway Haddenham, Aylesbury",HP17 8LG
+HA15,135,"135 Churchway Haddenham, Aylesbury",HP17 8LG
+HA15,147,"147 Churchway Haddenham, Aylesbury",HP17 8LG
+HA15,7,"7 Bishopstone Road Stone, Aylesbury",HP17 8QX
+HA15,33,"33 Bishopstone Road Stone, Aylesbury",HP17 8QX
+HA15,8,"8 Chiltern Avenue Stone, Aylesbury",HP17 8QY
+HA15,20,"20 Chiltern Avenue Stone, Aylesbury",HP17 8QY
+HA15,28,"28 Chiltern Avenue Stone, Aylesbury",HP17 8QY
+HA15,32,"32 Chiltern Avenue Stone, Aylesbury",HP17 8QY
+HA15,34,"34 Chiltern Avenue Stone, Aylesbury",HP17 8QY
+HA15,46,"46 Chiltern Avenue Stone, Aylesbury",HP17 8QY
+HA15,60,"60 Chiltern Avenue Stone, Aylesbury",HP17 8QY
+HA15,62,"62 Chiltern Avenue Stone, Aylesbury",HP17 8QY
+HA15,7,"7 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
+HA15,13,"13 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
+HA15,33,"33 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
+HA15,41,"41 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
+HA15,14,"14 Chiltern Close Stone, Aylesbury",HP17 8RA
+HA15,17,"17 Chiltern Close Stone, Aylesbury",HP17 8RA
+HA15,10,"10 Round Hill Stone, Aylesbury",HP17 8RD
+HA15,16,"16 Round Hill Stone, Aylesbury",HP17 8RD
+HA15,7,"7 Round Hill Stone, Aylesbury",HP17 8RE
+HA15,17,"17 Round Hill Stone, Aylesbury",HP17 8RE
+HA15,23,"23 Round Hill Stone, Aylesbury",HP17 8RE
+HA15,59,"59 Bishopstone Road Stone, Aylesbury",HP17 8RX
+HA15,1,"1 Bittenham Close Stone, Aylesbury",HP17 8RY
+HA15,7,"7 Bittenham Close Stone, Aylesbury",HP17 8RY
+HA15,1,"1 New Road Dinton, Aylesbury",HP17 8UU
+HA15,3,"3 New Road Dinton, Aylesbury",HP17 8UU
+HA15,8,"8 New Road Dinton, Aylesbury",HP17 8UU
+HA15,1,"1 Bernard Close Cuddington, Aylesbury",HP18 0AJ
+HA15,4,"4 Bernard Close Cuddington, Aylesbury",HP18 0AJ
+HA15,7,"7 Bernard Close Cuddington, Aylesbury",HP18 0AJ
+HA15,12,"12 Bernard Close Cuddington, Aylesbury",HP18 0AJ
+HA15,19,"19 Bernard Close Cuddington, Aylesbury",HP18 0AJ
+HA15,22,"22 Bernard Close Cuddington, Aylesbury",HP18 0AJ
+HA15,34,"34 Bernard Close Cuddington, Aylesbury",HP18 0AJ
+HA15,39,"39 Bernard Close Cuddington, Aylesbury",HP18 0AJ
+HA15,41,"41 Bernard Close Cuddington, Aylesbury",HP18 0AJ
+HA15,7,"7 Hillside Cottages Dadbrook, Aylesbury",HP18 0AQ
+HA15,10,"10 Hillside Cottages Dadbrook, Aylesbury",HP18 0AQ
+HA15,11,"11 Hillside Cottages Dadbrook, Aylesbury",HP18 0AQ
+HA15,7,"7 Swan Hill Aylesbury Road, Aylesbury",HP18 0BE
+HA15,10,"10 Swan Hill Aylesbury Road, Aylesbury",HP18 0BE
+HA15,1,"1 Grove Way Waddesdon, Aylesbury",HP18 0LH
+HA15,6,"6 Grove Way Waddesdon, Aylesbury",HP18 0LH
+HA15,7,"7 Grove Way Waddesdon, Aylesbury",HP18 0LH
+HA15,1,"1 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
+HA15,2,"2 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
+HA15,3,"3 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
+HA15,5,"5 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
+HA15,6,"6 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
+HA15,7,"7 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
+HA15,9,"9 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
+HA15,21,"21 Goss Avenue Waddesdon, Aylesbury",HP18 0LY
+HA15,86,"86 Sharps Close Waddesdon, Aylesbury",HP18 0LZ
+HA15,88,"88 Sharps Close Waddesdon, Aylesbury",HP18 0LZ
+HA15,3,"3 Hilltop Long Crendon, Aylesbury",HP18 9AT
+HA15,4,"4 Hilltop Long Crendon, Aylesbury",HP18 9AT
+HA15,1A,"1a Hilltop Long Crendon, Aylesbury",HP18 9AT
+HA15,3A,"3a Hilltop Long Crendon, Aylesbury",HP18 9AT
+HA15,26,"26 Peascroft Long Crendon, Aylesbury",HP18 9AU
+HA15,30,"30 Peascroft Long Crendon, Aylesbury",HP18 9AU
+HA15,52,"52 Peascroft Long Crendon, Aylesbury",HP18 9AU
+HA15,11,"11 Harroell Long Crendon, Aylesbury",HP18 9AY
+HA15,13,"13 Harroell Long Crendon, Aylesbury",HP18 9AY
+HA15,14,"14 Harroell Long Crendon, Aylesbury",HP18 9AY
+HA15,2,"2 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
+HA15,14,"14 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
+HA15,18,"18 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
+HA15,26,"26 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
+HA15,5,"5 Meadowbank Close Long Crendon, Aylesbury",HP18 9DH
+HA15,11,"11 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
+HA15,14,"14 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
+HA15,16,"16 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
+HA15,26,"26 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
+HA15,28,"28 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
+HA15,29,"29 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
+HA15,30,"30 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
+HA15,32,"32 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
+HA15,36,"36 Giffard Way Long Crendon, Aylesbury",HP18 9DN
+HA15,45,"45 Giffard Way Long Crendon, Aylesbury",HP18 9DN
+HA15,52,"52 Giffard Way Long Crendon, Aylesbury",HP18 9DN
+HA15,10,"10 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
+HA15,11,"11 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
+HA15,12,"12 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
+HA15,14,"14 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
+HA15,16,"16 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
+HA15,22,"22 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
+HA15,25,"25 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
+HA15,26,"26 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
+HA15,27,"27 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
+HA15,32,"32 Friars Furlong Long Crendon, Aylesbury",HP18 9DQ
+HA15,4,"4 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,5,"5 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,8,"8 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,9,"9 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,10,"10 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,11,"11 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,14,"14 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,17,"17 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,18,"18 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,20,"20 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,23,"23 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,24,"24 Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,14b,"14b Highfield Long Crendon, Aylesbury",HP18 9DR
+HA15,4,"4 Giffard Way Long Crendon, Aylesbury",HP18 9DW
+HA15,13,"13 Giffard Way Long Crendon, Aylesbury",HP18 9DW
+HA15,14,"14 Giffard Way Long Crendon, Aylesbury",HP18 9DW
+HA15,24,"24 St. Annes Road, Aylesbury",HP19 7RB
+HA15,55,"55 St. Annes Road, Aylesbury",HP19 7RB
+HA15,6,"6 Palmer Avenue, Aylesbury",HP19 8EF
+HA15,18,"18 Palmer Avenue, Aylesbury",HP19 8EF
+HA15,20,"20 Palmer Avenue, Aylesbury",HP19 8EF
+HA15,24,"24 Palmer Avenue, Aylesbury",HP19 8EF
+HA15,25,"25 Palmer Avenue, Aylesbury",HP19 8EF
+HA15,1,"1 Gatehouse Road, Aylesbury",HP19 8EH
+HA15,10,"10 Gatehouse Road, Aylesbury",HP19 8EH
+HA15,12,"12 Gatehouse Road, Aylesbury",HP19 8EH
+HA15,53,"53 Oxford Road, Aylesbury",HP19 8EQ
+HA15,59,"59 Oxford Road, Aylesbury",HP19 8EQ
+HA15,2,"2 Lander Road,Aylesbury,Bucks",HP19 9TT
+HA15,30,"30 Lander Road,Aylesbury,Bucks",HP19 9TT
+HA15,31,"31 Lander Road,Aylesbury,Bucks",HP19 9TT
+HA15,32,"32 Lander Road,Aylesbury,Bucks",HP19 9TT
+HA15,3,"3 Eeles Close,Aylesbury,Bucks",HP19 9TU
+HA15,5,"5 Eeles Close,Aylesbury,Bucks",HP19 9TU
+HA15,6,"6 Eeles Close,Aylesbury,Bucks",HP19 9TU
+HA15,7,"7 Eeles Close,Aylesbury,Bucks",HP19 9TU
+HA15,8,"8 Eeles Close,Aylesbury,Bucks",HP19 9TU
+HA15,9,"9 Eeles Close,Aylesbury,Bucks",HP19 9TU
+HA15,10,"10 Eeles Close,Aylesbury,Bucks",HP19 9TU
+HA15,15,"15 Eeles Close,Aylesbury,Bucks",HP19 9TU
+HA15,17,"17 Dicks Way,Aylesbury,Bucks",HP19 9UA
+HA15,20,"20 Dicks Way,Aylesbury,Bucks",HP19 9UA
+HA15,28,"28 Dicks Way,Aylesbury,Bucks",HP19 9UA
+HA15,30,"30 Dicks Way,Aylesbury,Bucks",HP19 9UA
+HA15,32,"32 Dicks Way,Aylesbury,Bucks",HP19 9UA
+HA15,34,"34 Dicks Way,Aylesbury,Bucks",HP19 9UA
+HA15,36,"36 Dicks Way,Aylesbury,Bucks",HP19 9UA
+HA15,7,"7 Fletcher Close,Aylesbury,Bucks",HP19 9UB
+HA15,8,"8 Fletcher Close,Aylesbury,Bucks",HP19 9UB
+HA15,10,"10 Fletcher Close,Aylesbury,Bucks",HP19 9UB
+HA15,11,"11 Fletcher Close,Aylesbury,Bucks",HP19 9UB
+HA15,12,"12 Fletcher Close,Aylesbury,Bucks",HP19 9UB
+HA15,25,"25 Fletcher Close,Aylesbury,Bucks",HP19 9UB
+HA15,33,"33 Fletcher Close,Aylesbury,Bucks",HP19 9UB
+HA15,34,"34 Fletcher Close,Aylesbury,Bucks",HP19 9UB
+HA15,11,"11 Grimmer Close,Aylesbury,Bucks",HP19 9UD
+HA15,14,"14 Grimmer Close,Aylesbury,Bucks",HP19 9UD
+HA15,15,"15 Grimmer Close,Aylesbury,Bucks",HP19 9UD
+HA15,23,"23 Grimmer Close,Aylesbury,Bucks",HP19 9UD
+HA15,12,"12 Vincent Road,Aylesbury,Bucks",HP19 9UN
+HA15,4,"4 Reading Close,Aylesbury,Bucks",HP19 9UW
+HA15,7,"7 Reading Close,Aylesbury,Bucks",HP19 9UW
+HA15,10,"10 Reading Close,Aylesbury,Bucks",HP19 9UW
+HA15,2,"2 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,4,"4 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,6,"6 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,8,"8 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,10,"10 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,14,"14 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,16,"16 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,18,"18 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,20,"20 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,22,"22 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,24,"24 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
+HA15,1,"1 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
+HA15,3,"3 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
+HA15,5,"5 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
+HA15,7,"7 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
+HA15,9,"9 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
+HA15,11,"11 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
+HA15,13,"13 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
+HA15,15,"15 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
+HA15,17,"17 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
+HA15,24,"24 St. Annes Road, Aylesbury",HP19 7RB
+HA15,55,"55 St. Annes Road, Aylesbury",HP19 7RB
+HA15,3,"3 Lansdowne Road, Aylesbury",HP20 2DJ
+HA15,15,"15 Lansdowne Road, Aylesbury",HP20 2DJ
+HA15,28,"28 Beechwood Way Aston Clinton, Aylesbury",HP22 5JP
+HA15,11,"11 Lower Icknield Way Aston Clinton, Aylesbury",HP22 5JS
+HA15,17,"17 Lower Icknield Way Aston Clinton, Aylesbury",HP22 5JS
+HA15,5,"5 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,6,"6 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,8,"8 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,12,"12 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,13,"13 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,15,"15 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,16,"16 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,19,"19 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,21,"21 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,23,"23 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
+HA15,13,"13 Beechwood Way Aston Clinton, Aylesbury",HP22 5JW
+HA15,24,"24 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
+HA15,26,"26 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
+HA15,34,"34 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
+HA15,39,"39 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
+HA15,42,"42 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
+HA15,44,"44 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
+HA15,45,"45 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
+HA15,89,"89 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
+HA15,9,"9 Longcroft Aston Clinton, Aylesbury",HP22 5JZ
+HA15,14,"14 Longcroft Aston Clinton, Aylesbury",HP22 5JZ
+HA15,55,"55 Grenville Avenue Wendover, Aylesbury",HP22 6AJ
+HA15,67,"67 Grenville Avenue Wendover, Aylesbury",HP22 6AJ
+HA15,75,"75 Grenville Avenue Wendover, Aylesbury",HP22 6AJ
+HA15,35,"35 Grenville Avenue Wendover, Aylesbury",HP22 6AQ
+HA15,12,"12 Boddington Road Wendover, Aylesbury",HP22 6HY
+HA15,16,"16 Boddington Road Wendover, Aylesbury",HP22 6HY
+HA15,21,"21 Boddington Road Wendover, Aylesbury",HP22 6HY
+HA15,35,"35 Boddington Road Wendover, Aylesbury",HP22 6HY
+HA15,39,"39 Boddington Road Wendover, Aylesbury",HP22 6HY
+HA15,5,"5 Boddington Road Wendover, Aylesbury",HP22 6HZ
+HA15,1,"1a Lionel Avenue Wendover, Aylesbury",HP22 6LL
+HA15,22,"22 Barley Close Weston Turville, Aylesbury",HP22 5SF
+HA15,24,"24 Barley Close Weston Turville, Aylesbury",HP22 5SF
+HA15,31,"31 Barley Close Weston Turville, Aylesbury",HP22 5SF
+HA15,39,"39 Barley Close Weston Turville, Aylesbury",HP22 5SF
+HA15,41,"41 Barley Close Weston Turville, Aylesbury",HP22 5SF
+HA15,43,"43 Barley Close Weston Turville, Aylesbury",HP22 5SF
+HA15,46,"46 Hampden Road Stoke Mandeville, Aylesbury",HP22 5TW
+HA15,6,"6 Hampden Road Stoke Mandeville, Aylesbury",HP22 5UF
+HA15,7,"7 Hampden Road Stoke Mandeville, Aylesbury",HP22 5UF
+HA15,21,"21 Hampden Road Stoke Mandeville, Aylesbury",HP22 5UF
+HA15,14,"14 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
+HA15,15,"15 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
+HA15,18,"18 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
+HA15,20,"20 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
+HA15,23,"23 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
+HA15,43,"43 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
+HA15,44,"44 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
+HA15,27,"27 Station Road Stoke Mandeville, Aylesbury",HP22 5UL
+HA15,29,"29 Station Road Stoke Mandeville, Aylesbury",HP22 5UL
+HA15,3,"3 Moor Park Wendover, Aylesbury",HP22 6AX
+HA15,9,"9 Moor Park Wendover, Aylesbury",HP22 6AX
+HA15,21,"21 Moor Park Wendover, Aylesbury",HP22 6AX
+HA15,35,"35 Moor Park Wendover, Aylesbury",HP22 6AX
+HA15,40,"40 Moor Park Wendover, Aylesbury",HP22 6AX
+HA15,42,"42 Moor Park Wendover, Aylesbury",HP22 6AX
+HA15,45,"45 Moor Park Wendover, Aylesbury",HP22 6AX
+HA15,48,"48 Moor Park Wendover, Aylesbury",HP22 6AX
+HA15,54,"54 Moor Park Wendover, Aylesbury",HP22 6AX
+HA15,58,"58 Moor Park Wendover, Aylesbury",HP22 6AX
--- a/etl/eligibility/ha_15_32/HA
+++ b/etl/eligibility/ha_15_32/HA
@ -0,0 +1,499 @@
+Housing Association,No.,Address,Postcode
+HA 32,1,SHERWOOD COURT,HU114DF
+HA 32,2,SHERWOOD COURT,HU114DF
+HA 32,3,SHERWOOD COURT,HU114DF
+HA 32,4,SHERWOOD COURT,HU114DF
+HA 32,5,SHERWOOD COURT,HU114DF
+HA 32,7,SHERWOOD COURT,HU114DF
+HA 32,8,SHERWOOD COURT,HU114DF
+HA 32,9,SHERWOOD COURT,HU114DF
+HA 32,10,SHERWOOD COURT,HU114DF
+HA 32,27,Seaton Grove,HU4 6HF
+HA 32,29,Seaton Grove,HU4 6HF
+HA 32,31,Seaton Grove,HU4 6HF
+HA 32,33,Seaton Grove,HU4 6HF
+HA 32,35,Seaton Grove,HU4 6HF
+HA 32,39,Seaton Grove,HU4 6HF
+HA 32,41,Seaton Grove,HU4 6HF
+HA 32,43,Seaton Grove,HU4 6HF
+HA 32,7,Norton Grove,HU4 6HG
+HA 32,9,Norton Grove,HU4 6HG
+HA 32,11,Norton Grove,HU4 6HG
+HA 32,15,Norton Grove,HU4 6HG
+HA 32,17,Norton Grove,HU4 6HG
+HA 32,19,Norton Grove,HU4 6HG
+HA 32,21,Norton Grove,HU4 6HG
+HA 32,28,Coxwold,HU4 6HH
+HA 32,30,Coxwold,HU4 6HH
+HA 32,32,Coxwold,HU4 6HH
+HA 32,34,Coxwold,HU4 6HH
+HA 32,36,Coxwold,HU4 6HH
+HA 32,38,Coxwold,HU4 6HH
+HA 32,40,Coxwold,HU4 6HH
+HA 32,42,Coxwold,HU4 6HH
+HA 32,44,Coxwold,HU4 6HH
+HA 32,971,HESSLE ROAD,HU4 6QG
+HA 32,973,HESSLE ROAD,HU4 6QG
+HA 32,975,HESSLE ROAD,HU4 6QG
+HA 32,977,HESSLE ROAD,HU4 6QG
+HA 32,981,HESSLE ROAD,HU4 6QG
+HA 32,983,HESSLE ROAD,HU4 6QG
+HA 32,1,Hessle Road,HU4 6RS
+HA 32,2,Hessle Road,HU4 6RS
+HA 32,3,Hessle Road,HU4 6RS
+HA 32,4,Hessle Road,HU4 6RS
+HA 32,5,Hessle Road,HU4 6RS
+HA 32,6,Hessle Road,HU4 6RS
+HA 32,7,Hessle Road,HU4 6RS
+HA 32,8,Hessle Road,HU4 6RS
+HA 32,9,Hessle Road,HU4 6RS
+HA 32,10,Hessle Road,HU4 6RS
+HA 32,11,Hessle Road,HU4 6RS
+HA 32,12,Hessle Road,HU4 6RS
+HA 32,14,Hessle Road,HU4 6RS
+HA 32,15,Hessle Road,HU4 6RS
+HA 32,16,Hessle Road,HU4 6RS
+HA 32,17,Hessle Road,HU4 6RS
+HA 32,18,Hessle Road,HU4 6RS
+HA 32,19,Hessle Road,HU4 6RS
+HA 32,20,Hessle Road,HU4 6RS
+HA 32,21,Hessle Road,HU4 6RS
+HA 32,22,Hessle Road,HU4 6RS
+HA 32,23,Hessle Road,HU4 6RS
+HA 32,24,Hessle Road,HU4 6RS
+HA 32,25,Hessle Road,HU4 6RS
+HA 32,26,Hessle Road,HU4 6RS
+HA 32,27,Hessle Road,HU4 6RS
+HA 32,28,Hessle Road,HU4 6RS
+HA 32,29,Hessle Road,HU4 6RS
+HA 32,30,Hessle Road,HU4 6RS
+HA 32,31,Hessle Road,HU4 6RS
+HA 32,32,Hessle Road,HU4 6RS
+HA 32,33,Hessle Road,HU4 6RS
+HA 32,34,Hessle Road,HU4 6RS
+HA 32,35,Hessle Road,HU4 6RS
+HA 32,36,Hessle Road,HU4 6RS
+HA 32,37,Hessle Road,HU4 6RS
+HA 32,46,FORESTER  WAY,HU4 6SR
+HA 32,48,FORESTER  WAY,HU4 6SR
+HA 32,50,FORESTER  WAY,HU4 6SR
+HA 32,54,FORESTER  WAY,HU4 6SR
+HA 32,56,FORESTER  WAY,HU4 6SR
+HA 32,62,FORESTER  WAY,HU4 6SR
+HA 32,64,FORESTER  WAY,HU4 6SR
+HA 32,66,FORESTER  WAY,HU4 6SR
+HA 32,68,FORESTER  WAY,HU4 6SR
+HA 32,70,FORESTER  WAY,HU4 6SR
+HA 32,15,SUMMERGROVES WAY,HU4 6SZ
+HA 32,1,WALNUT TREE WAY,HU4 6TG
+HA 32,2,WALNUT TREE WAY,HU4 6TG
+HA 32,3,WALNUT TREE WAY,HU4 6TG
+HA 32,4,WALNUT TREE WAY,HU4 6TG
+HA 32,7,WALNUT TREE WAY,HU4 6TG
+HA 32,8,WALNUT TREE WAY,HU4 6TG
+HA 32,9,WALNUT TREE WAY,HU4 6TG
+HA 32,291,Cottingham Road,HU5 4AT
+HA 32,293,Cottingham Road,HU5 4AT
+HA 32,295,Cottingham Road,HU5 4AT
+HA 32,297,Cottingham Road,HU5 4AT
+HA 32,299,Cottingham Road,HU5 4AT
+HA 32,301,Cottingham Road,HU5 4AT
+HA 32,303,Cottingham Road,HU5 4AT
+HA 32,305,Cottingham Road,HU5 4AT
+HA 32,307,Cottingham Road,HU5 4AT
+HA 32,309,Cottingham Road,HU5 4AT
+HA 32,1,Edith  Cavell Court,HU5 4BA
+HA 32,2,Edith  Cavell Court,HU5 4BA
+HA 32,3,Edith  Cavell Court,HU5 4BA
+HA 32,4,Edith  Cavell Court,HU5 4BA
+HA 32,5,Edith  Cavell Court,HU5 4BA
+HA 32,6,Edith  Cavell Court,HU5 4BA
+HA 32,7,Edith  Cavell Court,HU5 4BA
+HA 32,8,Edith  Cavell Court,HU5 4BA
+HA 32,9,Edith  Cavell Court,HU5 4BA
+HA 32,10,Edith  Cavell Court,HU5 4BA
+HA 32,11,Edith  Cavell Court,HU5 4BA
+HA 32,12,Edith  Cavell Court,HU5 4BA
+HA 32,106,Barringhton Avenue,HU5 4BE
+HA 32,112,Barringhton Avenue,HU5 4BE
+HA 32,114,Barringhton Avenue,HU5 4BE
+HA 32,116,Barringhton Avenue,HU5 4BE
+HA 32,118,Barringhton Avenue,HU5 4BE
+HA 32,120,Barringhton Avenue,HU5 4BE
+HA 32,122,Barringhton Avenue,HU5 4BE
+HA 32,124,Barringhton Avenue,HU5 4BE
+HA 32,126,Barringhton Avenue,HU5 4BE
+HA 32,1,Florence Nightingale Court,HU5 4BW
+HA 32,2,Florence Nightingale Court,HU5 4BW
+HA 32,3,Florence Nightingale Court,HU5 4BW
+HA 32,4,Florence Nightingale Court,HU5 4BW
+HA 32,5,Florence Nightingale Court,HU5 4BW
+HA 32,6,Florence Nightingale Court,HU5 4BW
+HA 32,7,Florence Nightingale Court,HU5 4BW
+HA 32,8,Florence Nightingale Court,HU5 4BW
+HA 32,9,Florence Nightingale Court,HU5 4BW
+HA 32,10,Florence Nightingale Court,HU5 4BW
+HA 32,11,Florence Nightingale Court,HU5 4BW
+HA 32,12,Florence Nightingale Court,HU5 4BW
+HA 32,14,Florence Nightingale Court,HU5 4BW
+HA 32,15,Florence Nightingale Court,HU5 4BW
+HA 32,17,Florence Nightingale Court,HU5 4BW
+HA 32,19,Florence Nightingale Court,HU5 4BW
+HA 32,12,Green Close,HU6 8DA
+HA 32,44,Green Close,HU6 8DA
+HA 32,49,Green Close,HU6 8DA
+HA 32,50,Green Close,HU6 8DA
+HA 32,14,Ashbury Court,HU6 8DY
+HA 32,38,Westgarth Avenue,HU6 8LS
+HA 32,46,WESTGARTH AVENUE,HU6 8LS
+HA 32,48,WESTGARTH AVENUE,HU6 8LS
+HA 32,54,Westgarth Avenue,HU6 8LS
+HA 32,10,BEAUTIMAN COURT,HU6 8LX
+HA 32,1,Rosey Row,HU9 1HF
+HA 32,2,Rosey Row,HU9 1HF
+HA 32,3,Rosey Row,HU9 1HF
+HA 32,4,Rosey Row,HU9 1HF
+HA 32,5,Rosey Row,HU9 1HF
+HA 32,6,Rosey Row,HU9 1HF
+HA 32,7,Rosey Row,HU9 1HF
+HA 32,8,Rosey Row,HU9 1HF
+HA 32,9,Rosey Row,HU9 1HF
+HA 32,10,Rosey Row,HU9 1HF
+HA 32,11,Rosey Row,HU9 1HF
+HA 32,12,Rosey Row,HU9 1HF
+HA 32,14,Rosey Row,HU9 1HF
+HA 32,15,Rosey Row,HU9 1HF
+HA 32,16,Rosey Row,HU9 1HF
+HA 32,17,Rosey Row,HU9 1HF
+HA 32,18,Rosey Row,HU9 1HF
+HA 32,19,Rosey Row,HU9 1HF
+HA 32,20,Rosey Row,HU9 1HF
+HA 32,21,Rosey Row,HU9 1HF
+HA 32,24,Steynburg Street,HU9 2PF
+HA 32,26,Steynburg Street,HU9 2PF
+HA 32,28,Steynburg Street,HU9 2PF
+HA 32,30,Steynburg Street,HU9 2PF
+HA 32,36,Steynburg Street,HU9 2PF
+HA 32,38,Steynburg Street,HU9 2PF
+HA 32,40,Steynburg Street,HU9 2PF
+HA 32,42,Steynburg Street,HU9 2PF
+HA 32,19,Rustenburg,HU9 2PT
+HA 32,21,Rustenburg,HU9 2PT
+HA 32,23,Rustenburg,HU9 2PT
+HA 32,25,Rustenburg,HU9 2PT
+HA 32,27,Rustenburg,HU9 2PT
+HA 32,29,Rustenburg,HU9 2PT
+HA 32,31,Rustenburg,HU9 2PT
+HA 32,33,Rustenburg,HU9 2PT
+HA 32,35,Rustenburg,HU9 2PT
+HA 32,37,Rustenburg,HU9 2PT
+HA 32,55,Rustenburg,HU9 2PT
+HA 32,57,Rustenburg,HU9 2PT
+HA 32,59,Rustenburg,HU9 2PT
+HA 32,61,Rustenburg,HU9 2PT
+HA 32,3,The Broadway,HU9 3JH
+HA 32,5,THE BROADWAY,HU9 3JH
+HA 32,7,The Broadway,HU9 3JH
+HA 32,9,The Broadway,HU9 3JH
+HA 32,11,The Broadway,HU9 3JH
+HA 32,1,BOWLING CIRCLE,HU9 3JL
+HA 32,3,BOWLING CIRCLE,HU9 3JL
+HA 32,5,BOWLING CIRCLE,HU9 3JL
+HA 32,7,BOWLING CIRCLE,HU9 3JL
+HA 32,9,BOWLING CIRCLE,HU9 3JL
+HA 32,1,MAJESTIC COURT,HU9 3JY
+HA 32,2,MAJESTIC COURT,HU9 3JY
+HA 32,3,MAJESTIC COURT,HU9 3JY
+HA 32,4,MAJESTIC COURT,HU9 3JY
+HA 32,5,MAJESTIC COURT,HU9 3JY
+HA 32,6,MAJESTIC COURT,HU9 3JY
+HA 32,7,MAJESTIC COURT,HU9 3JY
+HA 32,8,MAJESTIC COURT,HU9 3JY
+HA 32,9,MAJESTIC COURT,HU9 3JY
+HA 32,10,MAJESTIC COURT,HU9 3JY
+HA 32,11,MAJESTIC COURT,HU9 3JY
+HA 32,12,MAJESTIC COURT,HU9 3JY
+HA 32,14,MAJESTIC COURT,HU9 3JY
+HA 32,15,Majestic Court,HU9 3JY
+HA 32,16,MAJESTIC COURT,HU9 3JY
+HA 32,1,ROYALE COURT,HU9 3JZ
+HA 32,2,ROYALE COURT,HU9 3JZ
+HA 32,3,ROYALE COURT,HU9 3JZ
+HA 32,4,ROYALE COURT,HU9 3JZ
+HA 32,5,ROYALE COURT,HU9 3JZ
+HA 32,6,ROYALE COURT,HU9 3JZ
+HA 32,7,ROYALE COURT,HU9 3JZ
+HA 32,8,ROYALE COURT,HU9 3JZ
+HA 32,9,ROYALE COURT,HU9 3JZ
+HA 32,10,ROYALE COURT,HU9 3JZ
+HA 32,11,ROYALE COURT,HU9 3JZ
+HA 32,12,ROYALE COURT,HU9 3JZ
+HA 32,14,ROYALE COURT,HU9 3JZ
+HA 32,16,ROYALE COURT,HU9 3JZ
+HA 32,17,ROYALE COURT,HU9 3JZ
+HA 32,18,ROYALE COURT,HU9 3JZ
+HA 32,19,ROYALE COURT,HU9 3JZ
+HA 32,20,ROYALE COURT,HU9 3JZ
+HA 32,21,ROYALE COURT,HU9 3JZ
+HA 32,22,ROYALE COURT,HU9 3JZ
+HA 32,23,ROYALE COURT,HU9 3JZ
+HA 32,24,ROYALE COURT,HU9 3JZ
+HA 32,25,ROYALE COURT,HU9 3JZ
+HA 32,26,ROYALE COURT,HU9 3JZ
+HA 32,12A,ROYALE COURT,HU9 3JZ
+HA 32,79,MAYBURY ROAD,HU9 3LB
+HA 32,1,HEBRIDES CLOSE,HU9 3LF
+HA 32,2,HEBRIDES CLOSE,HU9 3LF
+HA 32,3,HEBRIDES CLOSE,HU9 3LF
+HA 32,4,HEBRIDES CLOSE,HU9 3LF
+HA 32,5,HEBRIDES CLOSE,HU9 3LF
+HA 32,6,HEBRIDES CLOSE,HU9 3LF
+HA 32,7,HEBRIDES CLOSE,HU9 3LF
+HA 32,8,HEBRIDES CLOSE,HU9 3LF
+HA 32,9,HEBRIDES CLOSE,HU9 3LF
+HA 32,10,HEBRIDES CLOSE,HU9 3LF
+HA 32,11,HEBRIDES CLOSE,HU9 3LF
+HA 32,14,Hebrides Close,HU9 3LF
+HA 32,15,HEBRIDES CLOSE,HU9 3LF
+HA 32,16,HEBRIDES CLOSE,HU9 3LF
+HA 32,17,HEBRIDES CLOSE,HU9 3LF
+HA 32,18,HEBRIDES CLOSE,HU9 3LF
+HA 32,19,HEBRIDES CLOSE,HU9 3LF
+HA 32,20,HEBRIDES CLOSE,HU9 3LF
+HA 32,21,HEBRIDES CLOSE,HU9 3LF
+HA 32,22,HEBRIDES CLOSE,HU9 3LF
+HA 32,23,HEBRIDES CLOSE,HU9 3LF
+HA 32,24,HEBRIDES CLOSE,HU9 3LF
+HA 32,25,HEBRIDES CLOSE,HU9 3LF
+HA 32,27,HEBRIDES CLOSE,HU9 3LF
+HA 32,28,HEBRIDES CLOSE,HU9 3LF
+HA 32,29,HEBRIDES CLOSE,HU9 3LF
+HA 32,30,HEBRIDES CLOSE,HU9 3LF
+HA 32,31,HEBRIDES CLOSE,HU9 3LF
+HA 32,32,HEBRIDES CLOSE,HU9 3LF
+HA 32,33,HEBRIDES CLOSE,HU9 3LF
+HA 32,34,HEBRIDES CLOSE,HU9 3LF
+HA 32,35,HEBRIDES CLOSE,HU9 3LF
+HA 32,36,HEBRIDES CLOSE,HU9 3LF
+HA 32,39,HEBRIDES CLOSE,HU9 3LF
+HA 32,40,HEBRIDES CLOSE,HU9 3LF
+HA 32,41,HEBRIDES CLOSE,HU9 3LF
+HA 32,42,HEBRIDES CLOSE,HU9 3LF
+HA 32,2,CROMARTY CLOSE,HU9 3LG
+HA 32,4,CROMARTY CLOSE,HU9 3LG
+HA 32,6,CROMARTY CLOSE,HU9 3LG
+HA 32,8,CROMARTY CLOSE,HU9 3LG
+HA 32,10,CROMARTY CLOSE,HU9 3LG
+HA 32,12,CROMARTY CLOSE,HU9 3LG
+HA 32,14,CROMARTY CLOSE,HU9 3LG
+HA 32,16,CROMARTY CLOSE,HU9 3LG
+HA 32,18,CROMARTY CLOSE,HU9 3LG
+HA 32,20,CROMARTY CLOSE,HU9 3LG
+HA 32,22,CROMARTY CLOSE,HU9 3LG
+HA 32,24,CROMARTY CLOSE,HU9 3LG
+HA 32,26,CROMARTY CLOSE,HU9 3LG
+HA 32,28,CROMARTY CLOSE,HU9 3LG
+HA 32,30,CROMARTY CLOSE,HU9 3LG
+HA 32,32,CROMARTY CLOSE,HU9 3LG
+HA 32,34,CROMARTY CLOSE,HU9 3LG
+HA 32,36,CROMARTY CLOSE,HU9 3LG
+HA 32,40,CROMARTY CLOSE,HU9 3LG
+HA 32,42,CROMARTY CLOSE,HU9 3LG
+HA 32,44,CROMARTY CLOSE,HU9 3LG
+HA 32,46,CROMARTY CLOSE,HU9 3LG
+HA 32,48,CROMARTY CLOSE,HU9 3LG
+HA 32,48,CROMARTY CLOSE,HU9 3LG
+HA 32,50,CROMARTY CLOSE,HU9 3LG
+HA 32,52,CROMARTY CLOSE,HU9 3LG
+HA 32,54,CROMARTY CLOSE,HU9 3LG
+HA 32,56,CROMARTY CLOSE,HU9 3LG
+HA 32,58,CROMARTY CLOSE,HU9 3LG
+HA 32,60,CROMARTY CLOSE,HU9 3LG
+HA 32,62,CROMARTY CLOSE,HU9 3LG
+HA 32,64,CROMARTY CLOSE,HU9 3LG
+HA 32,66,CROMARTY CLOSE,HU9 3LG
+HA 32,68,CROMARTY CLOSE,HU9 3LG
+HA 32,1,RONALDSWAY CLOSE,HU9 3LH
+HA 32,2,RONALDSWAY CLOSE,HU9 3LH
+HA 32,3,RONALDSWAY CLOSE,HU9 3LH
+HA 32,3,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
+HA 32,4,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
+HA 32,6,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
+HA 32,9,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
+HA 32,10,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
+HA 32,15,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
+HA 32,17,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
+HA 32,18,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
+HA 32,7,BROADWAY DRIVE,HU9 3PA
+HA 32,9,BROADWAY DRIVE,HU9 3PA
+HA 32,11,BROADWAY DRIVE,HU9 3PA
+HA 32,15,Broadway Drive,HU9 3PA
+HA 32,17,Broadway Drive,HU9 3PA
+HA 32,19,Broadway Drive,HU9 3PA
+HA 32,21,Broadway Drive,HU9 3PA
+HA 32,23,Broadway Drive,HU9 3PA
+HA 32,25,Broadway Drive,HU9 3PA
+HA 32,27,Broadway Drive,HU9 3PA
+HA 32,29,Broadway Drive,HU9 3PA
+HA 32,31,Broadway Drive,HU9 3PA
+HA 32,33,Broadway Drive,HU9 3PA
+HA 32,35,Broadway Drive,HU9 3PA
+HA 32,37,BROADWAY DRIVE,HU9 3PA
+HA 32,39,BROADWAY DRIVE,HU9 3PA
+HA 32,41,Broadway Drive,HU9 3PA
+HA 32,43,BROADWAY DRIVE,HU9 3PA
+HA 32,45,BROADWAY DRIVE,HU9 3PA
+HA 32,47,BROADWAY DRIVE,HU9 3PA
+HA 32,49,BROADWAY DRIVE,HU9 3PA
+HA 32,2,Broadway Drive,HU9 3PB
+HA 32,4,Broadway Drive,HU9 3PB
+HA 32,6,Broadway Drive,HU9 3PB
+HA 32,8,Broadway Drive,HU9 3PB
+HA 32,10,Broadway Drive,HU9 3PB
+HA 32,12,Broadway Drive,HU9 3PB
+HA 32,14,Broadway Drive,HU9 3PB
+HA 32,16,Broadway Drive,HU9 3PB
+HA 32,18,Broadway Drive,HU9 3PB
+HA 32,20,Broadway Drive,HU9 3PB
+HA 32,22,Broadway Drive,HU9 3PB
+HA 32,26,Broadway Drive,HU9 3PB
+HA 32,28,Broadway Drive,HU9 3PB
+HA 32,28,ADA HOLMES CIRCLE,HU9 3PB
+HA 32,30,Broadway Drive,HU9 3PB
+HA 32,32,Broadway Drive,HU9 3PB
+HA 32,34,Broadway Drive,HU9 3PB
+HA 32,36,Broadway Drive,HU9 3PB
+HA 32,38,Broadway Drive,HU9 3PB
+HA 32,40,Broadway Drive,HU9 3PB
+HA 32,42,Broadway Drive,HU9 3PB
+HA 32,44,Broadway Drive,HU9 3PB
+HA 32,46,Broadway Drive,HU9 3PB
+HA 32,48,Broadway Drive,HU9 3PB
+HA 32,52,Broadway Drive,HU9 3PB
+HA 32,56,Broadway Drive,HU9 3PB
+HA 32,58,Broadway Drive,HU9 3PB
+HA 32,60,Broadway Drive,HU9 3PB
+HA 32,55,RUTHERGLEN DRIVE,HU9 3PF
+HA 32,57,RUTHERGLEN DRIVE,HU9 3PF
+HA 32,59,RUTHERGLEN DRIVE,HU9 3PF
+HA 32,1,IMPERIAL COURT,HU9 3PG
+HA 32,3,IMPERIAL COURT,HU9 3PG
+HA 32,4,IMPERIAL COURT,HU9 3PG
+HA 32,5,IMPERIAL COURT,HU9 3PG
+HA 32,6,IMPERIAL COURT,HU9 3PG
+HA 32,7,IMPERIAL COURT,HU9 3PG
+HA 32,8,IMPERIAL COURT,HU9 3PG
+HA 32,9,IMPERIAL COURT,HU9 3PG
+HA 32,10,IMPERIAL COURT,HU9 3PG
+HA 32,10,SCHUBERT CLOSE,HU9 3PL
+HA 32,27,SCHUBERT CLOSE,HU9 3PL
+HA 32,28,SCHUBERT CLOSE,HU9 3PL
+HA 32,32,SCHUBERT CLOSE,HU9 3PL
+HA 32,1,Broadway Manor,HU9 3PN
+HA 32,1,Broadway Cottages,HU9 3PN
+HA 32,2,Broadway Manor,HU9 3PN
+HA 32,2,Broadway Cottages,HU9 3PN
+HA 32,3,Broadway Cottages,HU9 3PN
+HA 32,6,Broadway Manor,HU9 3PN
+HA 32,8,Broadway Manor,HU9 3PN
+HA 32,17,Broadway Manor,HU9 3PN
+HA 32,18,Broadway Manor,HU9 3PN
+HA 32,19,Broadway Manor,HU9 3PN
+HA 32,20,Broadway Manor,HU9 3PN
+HA 32,24,Broadway Manor,HU9 3PN
+HA 32,31,Broadway Manor,HU9 3PN
+HA 32,35,Broadway Manor,HU9 3PN
+HA 32,36,Broadway Manor,HU9 3PN
+HA 32,12A,Broadway Manor,HU9 3PN
+HA 32,1,FAROES CLOSE,HU9 4AN
+HA 32,2,Feroes Close,HU9 4AN
+HA 32,3,FAROES CLOSE,HU9 4AN
+HA 32,4,FAROES CLOSE,HU9 4AN
+HA 32,5,FAROES CLOSE,HU9 4AN
+HA 32,6,FAROES CLOSE,HU9 4AN
+HA 32,7,FAROES CLOSE,HU9 4AN
+HA 32,9,FAROES CLOSE,HU9 4AN
+HA 32,10,FAROES CLOSE,HU9 4AN
+HA 32,11,FAROES CLOSE,HU9 4AN
+HA 32,12,FAROES CLOSE,HU9 4AN
+HA 32,14,FAROES CLOSE,HU9 4AN
+HA 32,15,FAROES CLOSE,HU9 4AN
+HA 32,16,FAROES CLOSE,HU9 4AN
+HA 32,17,FAROES CLOSE,HU9 4AN
+HA 32,18,FAROES CLOSE,HU9 4AN
+HA 32,19,FAROES CLOSE,HU9 4AN
+HA 32,81,MAYBURY ROAD,HU93LB
+HA 32,1,ZIEGFELD COURT,HU93PH
+HA 32,2,ZIEGFELD COURT,HU93PH
+HA 32,3,ZIEGFELD COURT,HU93PH
+HA 32,4,ZIEGFELD COURT,HU93PH
+HA 32,5,ZIEGFELD COURT,HU93PH
+HA 32,6,ZIEGFELD COURT,HU93PH
+HA 32,7,ZIEGFELD COURT,HU93PH
+HA 32,8,ZIEGFELD COURT,HU93PH
+HA 32,9,ZIEGFELD COURT,HU93PH
+HA 32,1,GOLDEN COURT,HU93PJ
+HA 32,2,GOLDEN COURT,HU93PJ
+HA 32,3,GOLDEN COURT,HU93PJ
+HA 32,4,GOLDEN COURT,HU93PJ
+HA 32,5,GOLDEN COURT,HU93PJ
+HA 32,6,GOLDEN COURT,HU93PJ
+HA 32,7,GOLDEN COURT,HU93PJ
+HA 32,8,GOLDEN COURT,HU93PJ
+HA 32,10,GOLDEN COURT,HU93PJ
+HA 32,11,GOLDEN COURT,HU93PJ
+HA 32,12,GOLDEN COURT,HU93PJ
+HA 32,14,GOLDEN COURT,HU93PJ
+HA 32,15,GOLDEN COURT,HU93PJ
+HA 32,16,GOLDEN COURT,HU93PJ
+HA 32,17,GOLDEN COURT,HU93PJ
+HA 32,18,GOLDEN COURT,HU93PJ
+HA 32,19,GOLDEN COURT,HU93PJ
+HA 32,20,GOLDEN COURT,HU93PJ
+HA 32,22,GOLDEN COURT,HU93PJ
+HA 32,23,GOLDEN COURT,HU93PJ
+HA 32,24,GOLDEN COURT,HU93PJ
+HA 32,15,ROYALE COURT,HU9 3JZ
+HA 32,6,SHERWOOD COURT,HU114DF
+HA 32,979,HESSLE ROAD,HU4 6QG
+HA 32,985,HESSLE ROAD,HU4 6QG
+HA 32,2,BUSH CLOSE,HU4 6SP
+HA 32,11,BUSH CLOSE,HU4 6SP
+HA 32,16,BUSH CLOSE,HU4 6SP
+HA 32,52,FORESTER  WAY,HU4 6SR
+HA 32,72,FORESTER  WAY,HU4 6SR
+HA 32,74,FORESTER  WAY,HU4 6SR
+HA 32,3,SUMMERGROVES WAY,HU4 6SZ
+HA 32,5,WALNUT TREE WAY,HU4 6TG
+HA 32,6,WALNUT TREE WAY,HU4 6TG
+HA 32,417,Endike Lane,HU6 8AG
+HA 32,5,Ashbury Court,HU6 8DA
+HA 32,9,Ashbury Court,HU6 8DA
+HA 32,12,Ashbury Court,HU6 8DA
+HA 32,28,Green Close,HU6 8DA
+HA 32,34,Green Close,HU6 8DA
+HA 32,51,Green Close,HU6 8DA
+HA 32,259,Endike Lane,HU6 8DX
+HA 32,261,Endike Lane,HU6 8DX
+HA 32,17,Ashbury Court,HU6 8DY
+HA 32,20,Ashbury Court,HU6 8DY
+HA 32,30,Westgarth Avenue,HU6 8LS
+HA 32,45,Westgarth Avenue,HU6 8LS
+HA 32,65,Westgarth Avenue,HU6 8LS
+HA 32,12,BEAUTIMAN COURT,HU6 8LX
+HA 32,1,THE BROADWAY,HU9 3JH
+HA 32,12,HEBRIDES CLOSE,HU9 3LF
+HA 32,26,HEBRIDES CLOSE,HU9 3LF
+HA 32,37,HEBRIDES CLOSE,HU9 3LF
+HA 32,38,HEBRIDES CLOSE,HU9 3LF
+HA 32,24,Broadway Drive,HU9 3PB
+HA 32,50,Broadway Drive,HU9 3PB
+HA 32,54,Broadway Drive,HU9 3PB
+HA 32,2,IMPERIAL COURT,HU9 3PG
+HA 32,5,SCHUBERT CLOSE,HU9 3PL
+HA 32,8,SCHUBERT CLOSE,HU9 3PL
+HA 32,19,SCHUBERT CLOSE,HU9 3PL
+HA 32,34,SCHUBERT CLOSE,HU9 3PL
+HA 32,8,FAROES CLOSE,HU9 4AN
+HA 32,9,GOLDEN COURT,HU93PJ
+HA 32,21,GOLDEN COURT,HU93PJ
--- a/etl/eligibility/ha_15_32/HA15
+++ b/etl/eligibility/ha_15_32/HA15
--- a/etl/eligibility/ha_15_32/HA32
+++ b/etl/eligibility/ha_15_32/HA32
--- a/etl/eligibility/ha_15_32/WFT
+++ b/etl/eligibility/ha_15_32/WFT
@ -0,0 +1,665 @@
+import numpy as np
+import pandas as pd
+
+ECO4_NEW_RATES = 1710
+GBIS_NEW_RATES = 600
+
+
+def app():
+    # Load in the excel
+    nov_ha_data = pd.read_excel(
+        'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
+    )
+    # Drop rows where HA name is null
+    nov_ha_data = nov_ha_data.dropna(subset=["HA Name"])
+    nov_ha_data["ha_number"] = nov_ha_data["HA Name"].str.extract(r"(\d+)").astype(int)
+    nov_ha_data = nov_ha_data.sort_values("ha_number", ascending=True)
+
+    variance_explanations = pd.read_excel(
+        'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
+        sheet_name="Variance explanations"
+    )
+
+    september_figures = pd.read_excel(
+        "etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS SEP 23 UPDATE (2).xlsx",
+        sheet_name="HA Stats"
+    )
+
+    historical_invoices = pd.read_excel(
+        "etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx",
+        sheet_name="Jul 22 to Oct 23"
+    )
+    # Drop rows where installer rates is null
+    historical_invoices = historical_invoices[~pd.isnull(historical_invoices["INSTALLER RATES"])]
+    historical_invoices = historical_invoices[historical_invoices["INSTALLER RATES"] != "NA "]
+    # By Scheme, take a weighted mean of the INSTALLER RATES, weighted on the number of rows
+    n_invoices = historical_invoices.groupby(["Scheme", "INSTALLER RATES"])["Invoice number"].count().reset_index()
+    n_invoices = n_invoices[n_invoices["Scheme"].isin(["Eco 4", "GBIS"])]
+    historical_scheme_rates = n_invoices.groupby("Scheme").apply(
+        lambda x: np.average(x["INSTALLER RATES"], weights=x["Invoice number"])
+    ).reset_index().rename(columns={0: "Historical rates"})
+
+    # we take just entries sales data that have sales > 0
+    sales_data = nov_ha_data[nov_ha_data["Sales"] > 0]
+
+    # We now need to adjust sales data depending on the variance explanations
+    sales_data = sales_data.merge(
+        variance_explanations[["HA", 'Which figure is correct']],
+        how="left",
+        left_on="ha_number",
+        right_on="HA"
+    )
+
+    def adjust_sales(row):
+        if pd.isnull(row["Which figure is correct"]):
+            return row["Sales"]
+
+        if row["Which figure is correct"] == "HA facts & figures":
+            return row['No. of Tech surveys complete']
+
+        if row["Which figure is correct"] == "Billed amount":
+            return row["Sales"]
+
+        if row["Which figure is correct"] in ["Both correct, HA facts and figures includes November", "Both correct"]:
+            return row["Sales"]
+
+        raise ValueError(f"Unknown value for 'Which figure is correct': {row['Which figure is correct']}")
+
+    # We now need to adjust sales data depending on the variance explanations
+    sales_data["adjusted_sales"] = sales_data.apply(lambda row: adjust_sales(row), axis=1)
+
+    # We therefore adjust GBIS and ECO4 sales data based on adjusted sales
+    sales_data["adjusted_eco4_sales"] = sales_data["No. of Tech surveys complete - Eco 4"] / sales_data["Sales"] * \
+                                        sales_data["adjusted_sales"]
+
+    sales_data["adjusted_gbis_sales"] = sales_data["No. of Tech surveys complete - GBIS"] / sales_data["Sales"] * \
+                                        sales_data["adjusted_sales"]
+
+    sales_data["cancellation_rate"] = (sales_data["Sales"] - sales_data["adjusted_sales"]) / sales_data["Sales"]
+
+    # The difference between the adjusted sales and the actual sales is the cancellation
+    cancellations = (sales_data["adjusted_sales"].sum() - sales_data["Sales"].sum()) / sales_data["Sales"].sum()
+
+    # Given the cancellations, we can now adjust the expected remaining surveys
+    sales_data["No. of Tech surveys remaining"] = sales_data["No. of Tech surveys remaining"] * (
+        1 - sales_data["cancellation_rate"]
+    )
+
+    # We now merge on the expected values for September
+    sales_data = sales_data.merge(
+        september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
+            columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
+        ),
+        how="left",
+        on="HA Name",
+    )
+
+    sales_data["Sept Expected ECO4"] = sales_data["Sept Expected ECO4"].fillna(0)
+    sales_data["Sept Expected GBIS"] = sales_data["Sept Expected GBIS"].fillna(0)
+
+    # We calculate the ECO4 and GBIS conversion rates with the adjusted numbers
+    sales_data["ECO4 Conversion"] = sales_data["adjusted_eco4_sales"] / sales_data["adjusted_sales"]
+    sales_data["GBIS Conversion"] = sales_data["adjusted_gbis_sales"] / sales_data["adjusted_sales"]
+
+    # We now calculate the expected remaining ECO4 and GBIS sales
+    # We take the number of remaining surveys and multiply by the conversion rate for each scheme, which tells us
+    # how many more we should expect to see
+    sales_data["Expected Remaining ECO4"] = sales_data["No. of Tech surveys remaining"] * sales_data["ECO4 Conversion"]
+    sales_data["Expected Remaining GBIS"] = sales_data["No. of Tech surveys remaining"] * sales_data["GBIS Conversion"]
+
+    # We now produce a forecasted ECO4 and GBIS sales figure
+    sales_data["Forecasted ECO4 Sales"] = sales_data["adjusted_eco4_sales"] + sales_data["Expected Remaining ECO4"]
+    sales_data["Forecasted GBIS Sales"] = sales_data["adjusted_gbis_sales"] + sales_data["Expected Remaining GBIS"]
+
+    # Take the columns we're interestd in
+    # HA  # Properties	Sept ECO4 Figures	Sept GBIS Figures	Nov Total Sales	Nov ECO4 Sales	Nov GBIS Sales
+    # Remaining Surveys	ECO4 conversion	GBIS conversion	Forecasted ECO4 Sales	Forecasted GBIS sales	ECO4 Change
+    # GBIS Change
+    sales_data_formatted = sales_data[[
+        "HA Name",
+        "ASSET LIST no.",
+        "Sept Expected ECO4",
+        "Sept Expected GBIS",
+        "adjusted_sales",
+        "adjusted_eco4_sales",
+        "adjusted_gbis_sales",
+        "No. of Tech surveys remaining",
+        "ECO4 Conversion",
+        "GBIS Conversion",
+        "Forecasted ECO4 Sales",
+        "Forecasted GBIS Sales"
+    ]].rename(
+        columns={
+            "adjusted_sales": "Oct Total Sales (adjusted for variance)",
+            "adjusted_eco4_sales": "Oct ECO4 Sales (adjusted for variance)",
+            "adjusted_gbis_sales": "Oct GBIS Sales (adjusted for variance)",
+            "No. of Tech surveys remaining": "Remaining Surveys",
+        }
+    )
+
+    # Convert columns which should be integers to integers
+    for col in ["ASSET LIST no.", "Remaining Surveys", "Sept Expected ECO4", "Sept Expected GBIS",
+                "Oct Total Sales (adjusted for variance)", "Oct ECO4 Sales (adjusted for variance)",
+                "Oct GBIS Sales (adjusted for variance)", "Forecasted ECO4 Sales", "Forecasted GBIS Sales"]:
+        sales_data_formatted[col] = sales_data_formatted[col].fillna(0)
+        sales_data_formatted[col] = sales_data_formatted[col].astype(int)
+
+    # Remove HA 17 because this was EPCs only. We also remove HA33 because they do not have access to the full portfolio
+    sales_data_formatted = sales_data_formatted[
+        ~sales_data_formatted["HA Name"].isin(["HA 17", "HA 33"])
+    ]
+
+    # September expected ECO4 and GBIS
+    sept_expected_eco4 = sales_data_formatted["Sept Expected ECO4"].sum()
+    sept_expected_gbis = sales_data_formatted["Sept Expected GBIS"].sum()
+
+    # Completed so far
+    oct_eco4_sales = sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"].sum()
+    oct_gbis_sales = sales_data_formatted["Oct GBIS Sales (adjusted for variance)"].sum()
+
+    # Forecasted figures
+    forecasted_eco4_sales = sales_data_formatted["Forecasted ECO4 Sales"].sum()
+    forecasted_gbis_sales = sales_data_formatted["Forecasted GBIS Sales"].sum()
+
+    # Expected remaining sales
+    expected_remaining_eco4_sales = forecasted_eco4_sales - oct_eco4_sales
+    expected_remaining_gbis_sales = forecasted_gbis_sales - oct_gbis_sales
+
+    # Forecast change vs September
+    forecasted_eco4_change = 100 * (forecasted_eco4_sales - sept_expected_eco4) / sept_expected_eco4
+    forecasted_gbis_change = 100 * (forecasted_gbis_sales - sept_expected_gbis) / sept_expected_gbis
+
+    aggregates = pd.DataFrame(
+        columns=["Scheme", "Sept Expected", "Oct Completed", "Forecasted Remaining Sales", "Forecasted Total Sales",
+                 "Forecasted Change vs Sept"],
+        data=[
+            ["ECO4", sept_expected_eco4, oct_eco4_sales, expected_remaining_eco4_sales, forecasted_eco4_sales,
+             forecasted_eco4_change],
+            ["GBIS", sept_expected_gbis, oct_gbis_sales, expected_remaining_gbis_sales, forecasted_gbis_sales,
+             forecasted_gbis_change],
+        ]
+    )
+
+    # Multiply by histoical rates to get revenue
+    # For ECO4, this is ~£1456 and for GBIS it's ~£432
+    historical_gbis_price = historical_scheme_rates[
+        historical_scheme_rates["Scheme"] == "GBIS"
+        ]["Historical rates"].iloc[0]
+
+    historical_eco4_price = historical_scheme_rates[
+        historical_scheme_rates["Scheme"] == "Eco 4"
+        ]["Historical rates"].iloc[0]
+
+    aggregates["Sept Expected Revenue"] = np.where(
+        aggregates["Scheme"] == "ECO4",
+        aggregates["Sept Expected"] * historical_eco4_price,
+        aggregates["Sept Expected"] * historical_gbis_price
+    )
+
+    aggregates["Completed Revenue"] = np.where(
+        aggregates["Scheme"] == "ECO4",
+        aggregates["Oct Completed"] * historical_eco4_price,
+        aggregates["Oct Completed"] * historical_gbis_price
+    )
+
+    # We use the new rates for the forecasted revenue
+    aggregates["Forecasted Remaining Revenue"] = np.where(
+        aggregates["Scheme"] == "ECO4",
+        aggregates["Forecasted Remaining Sales"] * ECO4_NEW_RATES,
+        aggregates["Forecasted Remaining Sales"] * GBIS_NEW_RATES
+    )
+
+    # We also calculate the forecasted remaining revenue at the original price
+    aggregates["Forecasted Remaining Revenue (original price)"] = np.where(
+        aggregates["Scheme"] == "ECO4",
+        aggregates["Forecasted Remaining Sales"] * historical_eco4_price,
+        aggregates["Forecasted Remaining Sales"] * historical_gbis_price
+    )
+
+    aggregates["Forecasted Revenue"] = aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue"]
+
+    # Forecasted revenue with original price
+    aggregates["Forecasted Revenue (original price)"] = (
+        aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue (original price)"]
+    )
+
+    # Create a totals row which sums up the two rows
+
+    forecasted_change_vs_sept = 100 * (
+        aggregates["Forecasted Total Sales"].sum() - aggregates["Sept Expected"].sum()
+    ) / aggregates["Sept Expected"].sum()
+
+    aggregates = pd.concat(
+        [
+            aggregates,
+            pd.DataFrame(
+                [
+                    ["Total", aggregates["Sept Expected"].sum(), aggregates["Oct Completed"].sum(),
+                     aggregates["Forecasted Remaining Sales"].sum(), aggregates["Forecasted Total Sales"].sum(),
+                     forecasted_change_vs_sept,
+                     aggregates["Sept Expected Revenue"].sum(), aggregates["Completed Revenue"].sum(),
+                     aggregates["Forecasted Remaining Revenue"].sum(),
+                     aggregates["Forecasted Remaining Revenue (original price)"].sum(),
+                     aggregates["Forecasted Revenue"].sum(),
+                     aggregates["Forecasted Revenue (original price)"].sum(),
+                     ]
+                ],
+                columns=aggregates.columns
+            )
+        ]
+    )
+
+    # For each property in the asset list, we now calculate an average conversion rate to ECO4 and GBIS
+    # We do this by taking the forecasted sales values for each schemes and dividing by the number of properties
+
+    number_properties = sales_data_formatted["ASSET LIST no."].sum()
+    eco4_conversion_rate = forecasted_eco4_sales / number_properties
+    gbis_conversion_rate = forecasted_gbis_sales / number_properties
+
+    # We also attribute a future value per property
+    future_eco4_value = ECO4_NEW_RATES * eco4_conversion_rate
+    future_gbis_value = GBIS_NEW_RATES * gbis_conversion_rate
+
+    # We also calulate a revenue figure for the old rates
+    historical_eco4_value = historical_eco4_price * eco4_conversion_rate
+    historical_gbis_value = historical_gbis_price * gbis_conversion_rate
+
+    # For the HAs that have not begun selling, we estimate the value of the projects
+    # We start with some problem HAs
+
+    # HA 7, HA 24, HA 25
+    # These HAs have no sales data, so we use the expected figures
+
+    problem_has_data = nov_ha_data[
+        (nov_ha_data["HA Name"].isin(["HA 7", "HA 24", "HA 25"]))
+    ].copy()
+    # Merge on the september expected figures
+    problem_has_data = problem_has_data.merge(
+        september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
+            columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
+        ),
+        how="left",
+        on="HA Name",
+    )
+    # Fill NAs
+    problem_has_data["Sept Expected ECO4"] = problem_has_data["Sept Expected ECO4"].fillna(0)
+    problem_has_data["Sept Expected GBIS"] = problem_has_data["Sept Expected GBIS"].fillna(0)
+
+    # We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
+    problem_has_data["Expected ECO4 Sales"] = problem_has_data["ASSET LIST no."] * eco4_conversion_rate
+    problem_has_data["Expected GBIS Sales"] = problem_has_data["ASSET LIST no."] * gbis_conversion_rate
+
+    # Filter just on columns we're interested in
+    problem_has_data = problem_has_data[[
+        "HA Name",
+        "ASSET LIST no.",
+        "Sept Expected ECO4",
+        "Sept Expected GBIS",
+        "ECO4",
+        "GBIS",
+        "Expected ECO4 Sales",
+        "Expected GBIS Sales"
+    ]].rename(
+        columns={
+            "ECO4": "Nov Expected ECO4",
+            "GBIS": "Nov Expected GBIS",
+        }
+    )
+
+    # Fill NAs
+    problem_has_data["Nov Expected ECO4"] = problem_has_data["Nov Expected ECO4"].fillna(0)
+    problem_has_data["Nov Expected GBIS"] = problem_has_data["Nov Expected GBIS"].fillna(0)
+
+    # We calculate HA level Sept, Nov expected revenue, based on historical rates and then forecasted revenue
+    problem_has_data["Sept Expected ECO4 Value"] = problem_has_data["Sept Expected ECO4"] * historical_eco4_price
+    problem_has_data["Sept Expected GBIS Value"] = problem_has_data["Sept Expected GBIS"] * historical_gbis_price
+
+    problem_has_data["Nov Expected ECO4 Value"] = problem_has_data["Nov Expected ECO4"] * historical_eco4_price
+    problem_has_data["Nov Expected GBIS Value"] = problem_has_data["Nov Expected GBIS"] * historical_gbis_price
+
+    problem_has_data["Forecasted ECO4 Revenue"] = problem_has_data["ASSET LIST no."] * future_eco4_value
+    problem_has_data["Forecasted GBIS Revenue"] = problem_has_data["ASSET LIST no."] * future_gbis_value
+
+    # Totals
+    problem_has_data["Sept Expected Total Value"] = problem_has_data["Sept Expected ECO4 Value"] + \
+                                                    problem_has_data["Sept Expected GBIS Value"]
+    problem_has_data["Nov Expected Total Value"] = problem_has_data["Nov Expected ECO4 Value"] + \
+                                                   problem_has_data["Nov Expected GBIS Value"]
+    problem_has_data["Forecasted Total Revenue"] = problem_has_data["Forecasted ECO4 Revenue"] + \
+                                                   problem_has_data["Forecasted GBIS Revenue"]
+
+    # We calculate a total expected value for September, November and then forecasted
+    problem_has_expected_eco4_value = problem_has_data["Sept Expected ECO4"].sum() * historical_eco4_price
+    problem_has_expected_gbis_value = problem_has_data["Sept Expected GBIS"].sum() * historical_gbis_price
+    problem_has_expected_total_value = problem_has_expected_eco4_value + problem_has_expected_gbis_value
+
+    problem_has_nov_eco4_value = problem_has_data["Nov Expected ECO4"].sum() * historical_eco4_price
+    problem_has_nov_gbis_value = problem_has_data["Nov Expected GBIS"].sum() * historical_gbis_price
+    problem_has_nov_total_value = problem_has_nov_eco4_value + problem_has_nov_gbis_value
+
+    forecasted_eco4_value = problem_has_data["ASSET LIST no."].sum() * future_eco4_value
+    forecasted_gbis_value = problem_has_data["ASSET LIST no."].sum() * future_gbis_value
+    problem_has_forecasted_total_value = forecasted_eco4_value + forecasted_gbis_value
+
+    problem_has_summary = pd.DataFrame(
+        columns=["Scheme", "Sept Expected", "Nov Expected", "Forecasted"],
+        data=[
+            ["ECO4", problem_has_expected_eco4_value, problem_has_nov_eco4_value, forecasted_eco4_value],
+            ["GBIS", problem_has_expected_gbis_value, problem_has_nov_gbis_value, forecasted_gbis_value],
+            ["Total", problem_has_expected_total_value, problem_has_nov_total_value, problem_has_forecasted_total_value]
+        ]
+    )
+
+    # We now also estimate the value of the remaining HAs based on historical sales performance and new rates
+    # We take the has that are not in the sales data
+    remaining_has = nov_ha_data[
+        ~nov_ha_data["HA Name"].isin(sales_data_formatted["HA Name"])
+    ].copy()
+
+    # Merge on the september expected figures
+    remaining_has = remaining_has.merge(
+        september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
+            columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
+        ),
+        how="left",
+        on="HA Name",
+    )
+
+    # We update the asset list size for HA 33, because they do not have access to the full portfolio
+    remaining_has.loc[remaining_has["HA Name"] == "HA 33", "ASSET LIST no."] = 20699
+    # We also remove HA 17
+    remaining_has = remaining_has[~remaining_has["HA Name"].isin(["HA 17"])]
+
+    # We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
+    remaining_has["Expected ECO4 Sales"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
+    remaining_has["Expected GBIS Sales"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
+
+    # Filter just on columns we're interested in
+    remaining_has = remaining_has[[
+        "HA Name",
+        "ASSET LIST no.",
+        "Sept Expected ECO4",
+        "Sept Expected GBIS",
+        "ECO4",
+        "GBIS",
+    ]].rename(
+        columns={
+            "ECO4": "Nov Expected ECO4",
+            "GBIS": "Nov Expected GBIS",
+        }
+    )
+
+    remaining_has = remaining_has.fillna(0)
+
+    # We take just HAs that had an initial september expectation for ECO4 or GBIS, or that now have a Nov expectation
+    remaining_has = remaining_has[
+        (remaining_has["Sept Expected ECO4"] > 0) | (remaining_has["Sept Expected GBIS"] > 0) |
+        (remaining_has["Nov Expected ECO4"] > 0) | (remaining_has["Nov Expected GBIS"] > 0)
+        ]
+
+    # Expected sales based on asset list size and conversion rate
+    remaining_has["Forecasted Sales ECO4"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
+    remaining_has["Forecasted Sales GBIS"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
+
+    # Calculat the total expected value for September and November
+    remaining_has["Sept Expected ECO4 Value"] = remaining_has["Sept Expected ECO4"] * historical_eco4_price
+    remaining_has["Sept Expected GBIS Value"] = remaining_has["Sept Expected GBIS"] * historical_gbis_price
+
+    remaining_has["Nov Expected ECO4 Value"] = remaining_has["Nov Expected ECO4"] * historical_eco4_price
+    remaining_has["Nov Expected GBIS Value"] = remaining_has["Nov Expected GBIS"] * historical_gbis_price
+
+    # Calculate forecasted revenue
+    remaining_has["Forecasted ECO4 Revenue"] = remaining_has["ASSET LIST no."] * future_eco4_value
+    remaining_has["Forecasted GBIS Revenue"] = remaining_has["ASSET LIST no."] * future_gbis_value
+
+    # We also calculate forecasted revenue with the original price
+    remaining_has["Forecasted ECO4 Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_eco4_value
+    remaining_has["Forecasted GBIS Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_gbis_value
+
+    # Calculate totals for each scheme
+    remaining_has_september_eco4_sales = remaining_has["Sept Expected ECO4"].sum()
+    remaining_has_september_gbis_sales = remaining_has["Sept Expected GBIS"].sum()
+
+    remaining_has_november_eco4_sales = remaining_has["Nov Expected ECO4"].sum()
+    remaining_has_november_gbis_sales = remaining_has["Nov Expected GBIS"].sum()
+
+    remaining_has_forecasted_eco4_sales = remaining_has["Forecasted Sales ECO4"].sum()
+    remaining_has_forecasted_gbis_sales = remaining_has["Forecasted Sales GBIS"].sum()
+
+    remaining_has_september_eco4_value = remaining_has["Sept Expected ECO4 Value"].sum()
+    remaining_has_september_gbis_value = remaining_has["Sept Expected GBIS Value"].sum()
+
+    remaining_has_november_eco4_value = remaining_has["Nov Expected ECO4 Value"].sum()
+    remaining_has_november_gbis_value = remaining_has["Nov Expected GBIS Value"].sum()
+
+    remaining_has_forecasted_eco4_value = remaining_has["Forecasted ECO4 Revenue"].sum()
+    remaining_has_forecasted_gbis_value = remaining_has["Forecasted GBIS Revenue"].sum()
+
+    remaining_has_forecasted_eco4_value_original_price = remaining_has["Forecasted ECO4 Revenue (original price)"].sum()
+    remaining_has_forecasted_gbis_value_original_price = remaining_has["Forecasted GBIS Revenue (original price)"].sum()
+
+    # Calculate the change in forecasted sales against the September expected sales
+    remaining_has_foecast_change_eco4 = 100 * (
+        remaining_has["Forecasted Sales ECO4"].sum() - remaining_has["Sept Expected ECO4"].sum()
+    ) / remaining_has["Sept Expected ECO4"].sum()
+
+    remaining_has_foecast_change_gbis = 100 * (
+        remaining_has["Forecasted Sales GBIS"].sum() - remaining_has["Sept Expected GBIS"].sum()
+    ) / remaining_has["Sept Expected GBIS"].sum()
+
+    # Total change
+    remaining_has_foecast_change_total = 100 * (
+        remaining_has["Forecasted Sales ECO4"].sum() + remaining_has["Forecasted Sales GBIS"].sum() -
+        remaining_has["Sept Expected ECO4"].sum() - remaining_has["Sept Expected GBIS"].sum()
+    ) / (remaining_has["Sept Expected ECO4"].sum() + remaining_has["Sept Expected GBIS"].sum())
+
+    asset_list_size = remaining_has["ASSET LIST no."].sum()
+
+    # Create a summary table of the rest with the totals for ECO4, GBIS and then a total row
+    remaining_has_aggregate = pd.DataFrame(
+        columns=["Scheme", "Asset List Size", "Sept Expected Sales", "Nov Expected Sales", "Forecasted Sales",
+                 "Forecasted Change vs Sept",
+                 "Sept Expected Value", "Nov Expected Value", "Forecasted Value", "Forecasted Value (original price)"],
+        data=[
+            [
+                "ECO4", asset_list_size, remaining_has_september_eco4_sales, remaining_has_november_eco4_sales,
+                remaining_has_forecasted_eco4_sales, remaining_has_foecast_change_eco4,
+                remaining_has_september_eco4_value,
+                remaining_has_november_eco4_value, remaining_has_forecasted_eco4_value,
+                remaining_has_forecasted_eco4_value_original_price
+            ],
+            [
+                "GBIS", asset_list_size, remaining_has_september_gbis_sales, remaining_has_november_gbis_sales,
+                remaining_has_forecasted_gbis_sales, remaining_has_foecast_change_gbis,
+                remaining_has_september_gbis_value,
+                remaining_has_november_gbis_value, remaining_has_forecasted_gbis_value,
+                remaining_has_forecasted_gbis_value_original_price
+            ],
+            [
+                "Total", asset_list_size, remaining_has_september_eco4_sales + remaining_has_september_gbis_sales,
+                                          remaining_has_november_eco4_sales + remaining_has_november_gbis_sales,
+                                          remaining_has_forecasted_eco4_sales + remaining_has_forecasted_gbis_sales,
+                remaining_has_foecast_change_total,
+                                          remaining_has_september_eco4_value + remaining_has_september_gbis_value,
+                                          remaining_has_november_eco4_value + remaining_has_november_gbis_value,
+                                          remaining_has_forecasted_eco4_value + remaining_has_forecasted_gbis_value,
+                                          remaining_has_forecasted_eco4_value_original_price +
+                                          remaining_has_forecasted_gbis_value_original_price
+            ]
+        ]
+    )
+
+    # Calculate pipeline value
+    pipeline_value = aggregates[["Scheme", "Completed Revenue", "Forecasted Remaining Revenue"]].merge(
+        remaining_has_aggregate[["Scheme", "Forecasted Value"]].rename(
+            columns={"Forecasted Value": "Forecasted Revenue, Unconfirmed HAs"}
+        ), how="inner", on="Scheme"
+    )
+
+    # Calculate the total
+    pipeline_value["Total Value"] = (
+        pipeline_value["Completed Revenue"] + pipeline_value["Forecasted Remaining Revenue"] + pipeline_value[
+        "Forecasted Revenue, Unconfirmed HAs"]
+    )
+
+    # TODO: Insert model figures
+    model_results = pd.DataFrame(
+        [
+            {
+                # This one, we don't have sales data
+                "HA Name": "HA 15",
+                "Model Expected Additional ECO4 (unit level)": None,
+                "Model Expected Total ECO4 (unit level)": 296,
+                "Model Expected Additional GBIS (unit level)": None,
+                "Model Expected Total GBIS (unit level)": 209,
+            },
+            {
+                "HA Name": "HA 16",
+                # Old before re-run
+                # "Model Expected Additional ECO4 (unit level)": 418,
+                # "Model Expected Total ECO4 (unit level)": 1820,
+                # "Model Expected Additional GBIS (unit level)": 576,
+                # "Model Expected Total GBIS (unit level)": 612,
+
+                # IN the partial sales data, WFT have completed 1407 ECO4, 36 GBIS
+                "Model Expected Additional ECO4 (unit level)": 411 + 342 + 235,
+                "Model Expected Total ECO4 (unit level)": 1407 + 411 + 342 + 235,
+                "Model Expected Additional GBIS (unit level)": 223,
+                "Model Expected Total GBIS (unit level)": 36 + 223,
+            },
+            {
+                "HA Name": "HA 24",
+                "Model Expected Additional ECO4 (unit level)": 224,
+                "Model Expected Total ECO4 (unit level)": 848,
+                "Model Expected Additional GBIS (unit level)": 552,
+                "Model Expected Total GBIS (unit level)": 552,
+            },
+            {
+                "HA Name": "HA 25",
+                "Model Expected Additional ECO4 (unit level)": None,
+                "Model Expected Total ECO4 (unit level)": 1709 + 59,
+                "Model Expected Additional GBIS (unit level)": None,
+                "Model Expected Total GBIS (unit level)": 2004 + 107,
+            }
+        ]
+    )
+
+    sales_data_formatted["Remaining ECO4 Sales"] = (
+        sales_data_formatted["Forecasted ECO4 Sales"] - sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"]
+    )
+
+    sales_data_formatted["Remaining GBIS Sales"] = (
+        sales_data_formatted["Forecasted GBIS Sales"] - sales_data_formatted["Oct GBIS Sales (adjusted for variance)"]
+    )
+
+    sales_data_formatted["Completed ECO4 Revenue"] = (sales_data_formatted[
+                                                          "Oct ECO4 Sales (adjusted for variance)"] *
+                                                      historical_eco4_price)
+    sales_data_formatted["Completed GBIS Revenue"] = (sales_data_formatted[
+                                                          "Oct GBIS Sales (adjusted for variance)"] *
+                                                      historical_gbis_price)
+
+    ha_subset_with_sales = ["HA 15", "HA 16", "HA 24"]
+
+    has_subset_with_sales_value = sales_data_formatted[
+        sales_data_formatted["HA Name"].isin(ha_subset_with_sales)
+    ].copy()[
+        [
+            "HA Name",
+            "Oct ECO4 Sales (adjusted for variance)",
+            "Oct GBIS Sales (adjusted for variance)",
+            "Remaining ECO4 Sales",
+            "Remaining GBIS Sales",
+            "Forecasted ECO4 Sales",
+            "Forecasted GBIS Sales",
+            "Completed ECO4 Revenue",
+            "Completed GBIS Revenue"
+        ]
+    ]
+
+    has_subset_with_sales_value["Remaining ECO4 Revenue"] = has_subset_with_sales_value[
+                                                                "Remaining ECO4 Sales"] * ECO4_NEW_RATES
+    has_subset_with_sales_value["Remaining GBIS Revenue"] = has_subset_with_sales_value[
+                                                                "Remaining GBIS Sales"] * GBIS_NEW_RATES
+
+    has_subset_with_sales_value["Remaining Total Revenue"] = (
+        has_subset_with_sales_value["Remaining ECO4 Revenue"] + has_subset_with_sales_value["Remaining GBIS Revenue"]
+    )
+
+    model_results["Model Expected Additional ECO4 Revenue"] = (
+        model_results["Model Expected Additional ECO4 (unit level)"] * ECO4_NEW_RATES
+    )
+
+    model_results["Model Expected Additional GBIS revenue"] = (
+        model_results["Model Expected Additional GBIS (unit level)"] * GBIS_NEW_RATES
+    )
+
+    model_results["Model Expected Additional Total Revenue"] = (
+        model_results["Model Expected Additional ECO4 Revenue"] + model_results[
+        "Model Expected Additional GBIS revenue"]
+    )
+
+    # Show more columns with pandas
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+
+    # Look at HA 16
+    ha16_model = model_results[model_results["HA Name"] == "HA 16"]
+    has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 16"]
+
+    # WFT: For HA 16: 4,598,190 ECO4, 57,000 GBIS
+    # Model:
+
+    # Look at HA 24
+    ha24_model = model_results[model_results["HA Name"] == "HA 24"]
+    has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 24"]
+
+    # Look at HA 15
+    ha15_data = has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 15"]
+    ha15_portfolio_value = ha15_data["Completed ECO4 Revenue"] + ha15_data[
+        "Completed GBIS Revenue"] + ha15_data["Remaining Total Revenue"]
+    # # This doesn't have sales data so in the model analysis, we just value the ha as a whole
+    ha15_model = model_results[model_results["HA Name"] == "HA 15"]
+    ha15_value = ha15_model["Model Expected Total ECO4 (unit level)"].iloc[0] * ECO4_NEW_RATES + \
+                 ha15_model["Model Expected Total GBIS (unit level)"].iloc[0] * GBIS_NEW_RATES
+
+    model_results["Expected ECO4 Revenue"] = model_results["Model Expected Total ECO4 (unit level)"] * ECO4_NEW_RATES
+    model_results["Expected GBIS Revenue"] = model_results["Model Expected Total GBIS (unit level)"] * GBIS_NEW_RATES
+    model_results["Expected Total Revenue"] = model_results["Expected ECO4 Revenue"] + model_results[
+        "Expected GBIS Revenue"]
+    model_results[model_results["HA Name"].isin(["HA 15"])]
+
+    # We now create a final excel with all of the data
+    # We want:
+    # 1) aggregates
+    # 2) sales_data_formatted
+    # 3) remaining_has_aggregate
+    # 4) remaining_has
+    # 5) problem_has_summary
+
+    # Function to get the maximum column width
+    def get_col_widths(dataframe):
+        # First we find the maximum length of the index column
+        idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
+        # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
+        return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+
+    # Create a Pandas Excel writer using XlsxWriter as the engine
+    with pd.ExcelWriter('HA Pipeline Analysis.xlsx', engine='xlsxwriter') as writer:
+        # Write each dataframe to a different worksheet without the index
+        for df, sheet in [(aggregates, 'Forecasted Sales'),
+                          (sales_data_formatted, 'Sales Data'),
+                          (remaining_has_aggregate, 'Remaining HAs Value'),
+                          (remaining_has, 'Remaining HAs data'),
+                          (pipeline_value, 'Pipeline Value'),
+                          (problem_has_summary, 'Problem HAs Analysis'),
+                          (problem_has_data, 'Problem HAs Data')
+
+                          ]:
+
+            df.to_excel(writer, sheet_name=sheet, index=False)
+
+            # Auto-adjust columns' width
+            for i, width in enumerate(get_col_widths(df)):
+                writer.sheets[sheet].set_column(i, i, width)
--- a/etl/eligibility/ha_15_32/init.py
+++ b/etl/eligibility/ha_15_32/init.py
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
--- a/etl/eligibility/ha_15_32/cancellation.py
+++ b/etl/eligibility/ha_15_32/cancellation.py
@ -0,0 +1,113 @@
+import openpyxl
+import pandas as pd
+import numpy as np
+
+
+def get_excel_survey_list(workbook_path, worksheet_name=None):
+    survey_workbook = openpyxl.load_workbook(workbook_path)
+    if worksheet_name is not None:
+        survey_sheet = survey_workbook[worksheet_name]
+    else:
+        survey_sheet = survey_workbook.active
+
+    survey_rows = []
+    survey_colors = []
+
+    for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        survey_rows.append(row_data)
+        survey_colors.append(row_color)
+
+    survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+    survey_list["row_colour"] = survey_colors
+
+    return survey_list
+
+
+def load_data():
+    # Load for HA 16 - ECO 4
+    ha16_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
+
+    # Load for HA 24 - ECO 4
+    ha24_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
+
+    # Load for HA 25 - ECO 3
+    ha25_survey_list = get_excel_survey_list(
+        'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx', worksheet_name="CAVITY"
+    )
+
+    # Remove columns with None column names
+    ha25_survey_list = ha25_survey_list.dropna(axis=1, how='all')
+
+    # Standardised this installation status columns
+    ha16_survey_list["survey_status"] = ha16_survey_list["INSTALLED OR CANCELLED"].copy()
+    ha16_survey_list["survey_status"] = ha16_survey_list["survey_status"].replace(
+        {
+            "NO UPDATE - CHECKED 2.10.23": "no update",
+            "NO UPDATE - CHECKED 18.12.23": "no update",
+            "INSTALLED": "installed",
+            "CANCELLED": "cancelled",
+            "LOFT STILL TO BE INSTALLED": "loft remaining",
+        }
+    )
+
+    ha24_survey_list["survey_status"] = ha24_survey_list["INSTALLED OR CANCELLED"].copy()
+    ha24_survey_list["survey_status"] = ha24_survey_list["survey_status"].replace(
+        {
+            "NO UPDATE - CHECKED 21.11.23": "no update",
+            "NO UPDATE - CHECKED 18.12.23": "no update",
+            "INSTALLED": "installed",
+            "CANCELLED": "cancelled",
+            "LOFT STILL TO BE INSTALLED": "loft remaining",
+            "SEE NOTES >>": "see notes",
+        }
+    )
+
+    # We need to prepare HA25 differently
+    ha25_survey_list["survey_status"] = np.where(
+        ha25_survey_list["row_colour"] == "FF7030A0", "installed",
+        np.where(ha25_survey_list["row_colour"] == "FF92D050", "installed",
+                 np.where(ha25_survey_list["row_colour"] == "FFFF0000", "cancelled",
+                          np.where(ha25_survey_list["row_colour"] == "FFFFFF00", "filler row - drop",
+                                   np.where(ha25_survey_list["row_colour"] == "FF38FD23", "installed", "unknown")
+                                   )
+                          )
+                 )
+    )
+    ha25_survey_list = ha25_survey_list[ha25_survey_list["survey_status"] != "filler row - drop"]
+
+    # We standardise the cancellation reasons - just create a new column
+    ha16_survey_list["cancellation_reason"] = ha16_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
+    ha24_survey_list["cancellation_reason"] = ha24_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
+    # There's no cancellation reason for HA25
+    ha25_survey_list["cancellation_reason"] = "No reason provided"
+
+    # Combine the dataframes
+    ha16_survey_list["HA"] = "HA 16"
+    ha24_survey_list["HA"] = "HA 24"
+    ha25_survey_list["HA"] = "HA 25"
+
+    cancellation_data = pd.concat(
+        [
+            ha16_survey_list[["HA", "survey_status", "cancellation_reason"]],
+            ha24_survey_list[["HA", "survey_status", "cancellation_reason"]],
+            ha25_survey_list[["HA", "survey_status", "cancellation_reason"]]
+        ]
+    )
+
+    # Take just rows that we have a confirmed status for
+    cancellation_data = cancellation_data[~cancellation_data["survey_status"].isin(["no update", "loft remaining"])]
+
+    return cancellation_data
+
+
+def app():
+    """
+    This application is used to analyse the cancellation data provided by warmfront
+    :return:
+    """
+
+    # This is cancellations of jobs that completed invasive surveys and the installer could not conclude the work
+    sales_cancellation_data = load_data()
--- a/etl/eligibility/ha_15_32/ha16_app.py
+++ b/etl/eligibility/ha_15_32/ha16_app.py
@ -0,0 +1,647 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+import re
+
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_data():
+    # This asset list is spread across two sheets, which we need to combine
+
+    asset_list_filenames = [
+        "HESTIA - HA 16 ASSET LIST PART 1 OF 2.xlsx",
+        "HESTIA - HA 16 ASSET LIST PART 2 OF 2.xlsx",
+    ]
+
+    # Prepare lists to collect rows data and their colors
+    rows_data = []
+    rows_colors = []
+    colnames = []
+    for asset_list_filename in asset_list_filenames:
+        workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/{asset_list_filename}')
+        sheet = workbook.active
+        sheet_colnames = [cell.value for cell in sheet[1]]
+        colnames.append(sheet_colnames)
+
+        for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+            # row_color = COLOR_INDEX[row_color]
+            rows_data.append(row_data)
+            rows_colors.append(row_color)
+
+    asset_list = pd.DataFrame(rows_data, columns=colnames[0])
+    # Remove None columns
+    asset_list = asset_list.iloc[:, 0:12]
+    asset_list['row_color'] = rows_colors
+
+    asset_list["row_colour_name"] = np.where(
+        asset_list["row_color"] == "FFFF0000", "red",
+        np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
+    )
+
+    # Split up the address on commas, which is useful for matching later
+    split_addresses = asset_list['Address'].str.split(',', expand=True)
+    split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5']
+
+    asset_list = pd.concat([asset_list, split_addresses], axis=1)
+    # There is no commas separating house number and address 1
+    split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
+    split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
+    # We could re-concatenate but we only care about HouseNo for the moment
+    asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
+
+    # We now read in the survey list
+    survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
+    survey_sheet = survey_workbook.active
+
+    survey_rows = []
+    survey_colors = []
+
+    for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        survey_rows.append(row_data)
+        survey_colors.append(row_color)
+
+    survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+
+    # For the survey list, we don't need the colours, since there is a column called "INSTALLED OR CANCELLED"
+    # which describes the status of the property
+    survey_list["row_colour"] = survey_colors
+    survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
+    # Tidy up the street/block name a bit
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+    survey_list["Street / Block Name"] = np.where(
+        survey_list["Street / Block Name"] == "REEDS RD",
+        "Reeds ROAD",
+        survey_list["Street / Block Name"]
+    )
+    # Replace " rd " with "road"
+    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
+
+    # Replace " , " with ", "
+    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
+        " , ", ', ',
+    )
+    # Fix "{place} ,{place}" with "{place}, {place}"
+    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True)
+    # Strip whitespace
+    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
+
+    # Correct errors
+    survey_list["Post Code"] = np.where(
+        survey_list["Post Code"] == "M38 0SA",
+        "M38 9SA",
+        survey_list["Post Code"]
+    )
+
+    survey_list["Post Code"] = np.where(
+        (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
+        "M44 5JF",
+        survey_list["Post Code"]
+    )
+
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
+                                                                                        "plantation avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
+                                                                                        "howclough drive")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
+                                                                                        "brookhurst lane")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
+                                                                                        "birch road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
+                                                                                        "hodson road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
+                                                                                        "narbonne avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cumberland road, cadishead",
+                                                                                        "cumberland avenue, cadishead")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
+                                                                                        "ashton field drive")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
+                                                                                        "wedgwood road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
+                                                                                        "hamilton avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("lichens crescent, fitton hill",
+                                                                                        "lichens crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
+                                                                                        "south croft")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr", "fir tree avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
+                                                                                        "hawthorn crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
+                                                                                        "reins lee avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
+                                                                                        "wester hill road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
+                                                                                        "saint martins road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
+                                                                                        "timperley close")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
+                                                                                        "eastwood avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
+                                                                                        "grasmere road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
+                                                                                        "hulton avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
+                                                                                        "beechfield road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
+                                                                                        "princes avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
+                                                                                        "edge fold crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
+                                                                                        "coniston avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
+                                                                                        "blackthorn crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
+                                                                                        "wellstock lane")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
+                                                                                        "brackley street")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
+                                                                                        "brook avenue, swinton")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
+                                                                                        "green avenue, swinton")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
+                                                                                        "grasmere avenue, wardley")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
+                                                                                        "mardale avenue, wardle")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
+                                                                                        "cartleach Grove")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
+                                                                                        "arbor Grove")
+
+    # Replacement for clively avenue 66-68
+    survey_list["NO."] = np.where(
+        survey_list["NO."] == "66-68",
+        "66",
+        survey_list["NO."]
+    )
+
+    # asset_list[asset_list["Address"].str.lower().str.contains("clively")]
+
+    # We now need to merge the survey list onto the asset list
+    # Could be easier just to do a search on each row, even though it's much slower
+    matched = []
+    for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
+
+        house_number = row["NO."]
+        if isinstance(house_number, str):
+            house_number = house_number.lower()
+
+        # Filter on the first line of the address
+        df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
+        # df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+        df = df[df["Address"].str.lower().str.contains(str(house_number))]
+        if df.shape[0] != 1:
+            df = df[df["HouseNo"] == str(house_number)]
+            if df.shape[0] != 1:
+                df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+                if df.shape[0] != 1:
+                    raise ValueError("Investigate")
+
+        matched.append(
+            {
+                "survey_key": row["survey_key"],
+                "matched_address": df["Address"].values[0],
+                "survey_house_no": row["NO."],
+                "survey_street_name": row["Street / Block Name"],
+                "survey_postcode": row["Post Code"],
+                "survey_status": row["INSTALLED OR CANCELLED"]
+            }
+        )
+
+    matched = pd.DataFrame(matched)
+    matched["warmfront_identified"] = True
+
+    # Combine asset list and surveys
+    data = asset_list.merge(
+        matched, how="left", left_on="Address", right_on="matched_address",
+    )
+    data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
+
+    return data, survey_list
+
+
+def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    scoring_data = []
+    results = []
+    nodata = []
+
+    property_type_lookup = {
+        'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+        'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
+        'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
+        'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
+        'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
+        'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+        'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Detached House': {"property-type": "House", "built-form": "Detached"},
+        'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
+        'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
+    }
+
+    for index, property_meta in tqdm(data.iterrows(), total=len(data)):
+
+        searcher = SearchEpc(
+            address1=property_meta["HouseNo"],
+            postcode=property_meta["Postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            full_address=property_meta["Address"]
+        )
+        searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Type"]]["property-type"]
+        searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["Type"]]["built-form"]
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(property_meta)
+            continue
+
+        if searcher.newest_epc.get("estimated"):
+            # We insert the row ID as our proxy for UPRN
+            proxy_uprn = int(property_meta["row_id"].split("_")[1])
+            searcher.newest_epc["uprn"] = proxy_uprn
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+        # We also want to get the penultimate epc
+        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+        if not penultimate_epc:
+            penultimate_epc = newest_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+            eligibility.check_gbis_warmfront()
+            eligibility.check_eco4_warmfront()
+            # If this is the case, we need to update the older epcs
+            # We don't update just to make data cleaning easier
+            if penultimate_epc.get("estimated") is None:
+                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+        # Full checks
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"]:
+            if eligibility.epc["uprn"] == "":
+                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+            scoring_dictionary = prepare_model_data_row(
+                property_id=property_meta["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        results.append(
+            {
+                "row_id": property_meta["row_id"],
+                "uprn": eligibility.epc["uprn"],
+                "Address": property_meta["Address"],
+                "Postcode": property_meta["Postcode"],
+                "property_type": eligibility.epc["property-type"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "cavity_type": eligibility.cavity["type"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                "loft_thickness": eligibility.roof["insulation_thickness"],
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+    return results_df, scoring_data, nodata
+
+
+def analyse_results(results_df, data, survey_list):
+    analysis_data = data[["row_id", "survey_key", "warmfront_identified", "row_colour_name"]].merge(
+        results_df, how="left", on="row_id"
+    ).merge(
+        survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
+        how="left", on="survey_key"
+    )
+
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    warmfront_sold_eco4 = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
+        ]  # 1407
+
+    warmfront_sold_gbis = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
+        ]
+
+    ideal_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    secondary_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            analysis_data["roof_insulation_thickness_numeric"] > 100)
+        ]
+
+    # underperforming cavities
+    underperforming_cavities = analysis_data[
+        (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
+            analysis_data["cavity_age"] > 10 * 365
+        ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    identified_gbis_not_sold = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            analysis_data["eco4_eligible"] == False
+        )
+        ]
+
+    eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True]
+    eco_ineligible = analysis_data[analysis_data["eco4_eligible"] == False]
+
+    eco_ineligible["eco4_message"].value_counts()
+
+    # SAP too high:
+    sap_too_high = eco_ineligible[eco_ineligible["eco4_message"] == "sap too high"].copy()
+    further_possibilities = sap_too_high[
+        sap_too_high["walls"].isin(
+            [
+                "Cavity wall, as built, insulated",
+                "Cavity wall, as built, no insulation",
+                "Cavity wall, as built, partial insulation",
+                "Cavity wall, no insulation",
+                "Cavity wall, partial insulation"
+            ]
+        )
+    ]
+
+    filled_cavities = eco_ineligible[
+        eco_ineligible["eco4_message"] == "sap too high"
+        ]
+
+    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+    warmfront_identified["walls"].value_counts()
+
+    all_identified_gbis = analysis_data[
+        (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
+            ["ECO4 GBIS (ECO+)"])) |
+        (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
+        ]
+
+    empty_cavity_desriptions = [
+        "Cavity wall, as built, no insulation", "Cavity wall, as built, partial insulation",
+        "Cavity wall, no insulation", "Cavity wall, partial insulation"
+    ]
+
+    empty_cavities = analysis_data[analysis_data["walls"].isin(empty_cavity_desriptions)]
+    remaining_empty = empty_cavities[~empty_cavities["warmfront_identified"]]
+
+    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+
+    # Of the ECO jobs, what proportion to we get right
+    warmfront_identified_eco = warmfront_identified[
+        warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
+    ]
+
+    eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
+
+    warmfront_identified_gbis = warmfront_identified[
+        warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
+    ]
+
+    gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
+
+    # Additional identified
+    additional_identified_eco = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
+        ]
+
+    additional_identified_eco["eligibility_classification"].value_counts()
+
+    additional_identified_gbis = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
+            analysis_data["warmfront_identified"] == False
+        )
+        ].shape[0]
+    # Future
+    additional_identified_eco_future = analysis_data[
+        (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
+        ].shape[0]
+    additional_identified_gbis_future = analysis_data[
+        (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
+            analysis_data["warmfront_identified"] == False
+        )
+        ].shape[0]
+
+
+def app():
+    data, survey_list = load_data()
+
+    data["row_id"] = ["ha16_" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    created_at = datetime.now().isoformat()
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    results_df, scoring_data, nodata = get_epc_data(
+        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+    )
+
+    # Store
+    # Old file was ha16.pickle
+    # import pickle
+    # with open("ha16_10_jan.pickle", "wb") as f:
+    #     pickle.dump(
+    #         {
+    #             "scoring_data": scoring_data,
+    #             "results": results_df,
+    #             "nodata": nodata
+    #         }, f
+    #     )
+
+    # Read pickle
+    # import pickle
+    # with open("ha16_10_jan.pickle", "rb") as f:
+    #     saved = pickle.load(f)
+    # scoring_data = saved["scoring_data"]
+    # results_df = saved["results"]
+    # nodata = saved["nodata"]
--- a/etl/eligibility/ha_15_32/ha24_app.py
+++ b/etl/eligibility/ha_15_32/ha24_app.py
@ -0,0 +1,524 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_data():
+    workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ASSET LIST.xlsx')
+    sheet = workbook.active
+    sheet_colnames = [cell.value for cell in sheet[1]]
+
+    rows_data = []
+    rows_colors = []
+    for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        rows_data.append(row_data)
+        rows_colors.append(row_color)
+
+    asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
+    # Remove None columns
+    asset_list = asset_list.iloc[:, 0:10]
+    asset_list['row_color'] = rows_colors
+
+    asset_list["row_colour_name"] = np.where(
+        asset_list["row_color"] == "FFFF0000", "red",
+        np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
+    )
+
+    asset_list["row_colour_code"] = np.where(
+        asset_list["row_colour_name"] == "red", "does not meet criteria",
+        np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
+    )
+
+    # The third column is listed as "Address" but it's actually the postcode". We have two Address columns so we
+    # change just the third
+    asset_list.columns.values[2] = "Postcode"
+
+    # Split up the address on commas, which is useful for matching later
+    split_addresses = asset_list['Address'].str.split(',', expand=True)
+    split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5', 'address6']
+
+    asset_list = pd.concat([asset_list, split_addresses], axis=1)
+    # There is no commas separating house number and address 1
+    split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
+    split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
+    # We could re-concatenate but we only care about HouseNo for the moment
+    asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
+
+    # Read in surveys
+    survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
+    survey_sheet = survey_workbook.active
+
+    survey_rows = []
+    survey_colors = []
+
+    for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        survey_rows.append(row_data)
+        survey_colors.append(row_color)
+
+    survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+
+    survey_list["row_colour"] = survey_colors
+    survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
+    # Tidy up the street/block name a bit
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
+
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "council house, nidds lane", "nidds lane"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "wirral avenue", "wirrall avenue"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "st ives road", "st. ives crescent"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "sundringham road", "sandringham road"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "milton avenue", "milton road"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "st ives crescent", "st. ives crescent"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "council house, waterbelly lane", "waterbelly lane"
+    )
+    # Generally remove "councile house, " from the start of the street name
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "council house, ", ""
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "st. leodegars close", "st leodegars close"
+    )
+
+    # asset_list[asset_list["Address"].str.lower().str.contains("wirral")]["Address"]
+
+    # Drop all None rows
+    survey_list = survey_list[~pd.isnull(survey_list["Street / Block Name"])]
+    survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
+
+    matched = []
+    for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
+        house_number = row["NO."]
+        if isinstance(house_number, str):
+            house_number = house_number.lower()
+
+        # Filter on the first line of the address
+        df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
+        # df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+        df = df[df["Address"].str.lower().str.contains(str(house_number))]
+        if df.shape[0] != 1:
+            df = df[df["HouseNo"] == str(house_number)]
+            if df.shape[0] != 1:
+                df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+                if df.shape[0] != 1:
+                    print(row["Street / Block Name"])
+                    print(house_number)
+                    print(row["Post Code"].lower())
+                    raise ValueError("Investigate")
+
+        matched.append(
+            {
+                "survey_key": row["survey_key"],
+                "matched_address": df["Address"].values[0],
+                "survey_house_no": row["NO."],
+                "survey_street_name": row["Street / Block Name"],
+                "survey_postcode": row["Post Code"],
+                "survey_status": row["INSTALLED OR CANCELLED"]
+            }
+        )
+
+    matched = pd.DataFrame(matched)
+    matched["warmfront_identified"] = True
+
+    # Combine asset list and surveys
+    data = asset_list.merge(
+        matched, how="left", left_on="Address", right_on="matched_address",
+    )
+    data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
+
+    return data, survey_list
+
+
+def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    scoring_data = []
+    results = []
+    nodata = []
+
+    property_type_lookup = {
+        "01 HOUSE": "House",
+        "02 FLAT": "Flat",
+        "03 BUNGALOW": "Bungalow",
+        "05 BEDSIT": "Flat",
+        "04 MAISONETTE": "Maisonette",
+        "01 HOUSE MID": "House",
+        "10 PBUNGALOW": "Bungalow",
+        "14 SFLAT": "Flat",
+        "12 SBEDSIT": "Flat",
+        "11 PFLAT": "Flat",
+        "13 SBUNGALOW": "Bungalow",
+        " 01 HOUSE MID": "House",
+        "09 PBEDSIT": "Flat"
+    }
+
+    for _, property_meta in tqdm(data.iterrows(), total=len(data)):
+
+        searcher = SearchEpc(
+            address1=property_meta["HouseNo"],
+            postcode=property_meta["Postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            full_address=property_meta["Address"]
+        )
+        searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Property Type"]]
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(property_meta)
+            continue
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+        # We also want to get the penultimate epc
+        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+        if not penultimate_epc:
+            penultimate_epc = newest_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+            eligibility.check_gbis_warmfront()
+            eligibility.check_eco4_warmfront()
+            # If this is the case, we need to update the older epcs
+            # older_epcs = [
+            #     x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]]
+            # ]
+            # If this is the case, we need to update the older epcs
+            # We don't update just to make data cleaning easier
+            if penultimate_epc.get("estimated") is None:
+                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+        # Full checks
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"]:
+            if eligibility.epc["uprn"] in ["", None]:
+                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+            scoring_dictionary = prepare_model_data_row(
+                property_id=property_meta["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        results.append(
+            {
+                "row_id": property_meta["row_id"],
+                "uprn": eligibility.epc["uprn"],
+                "Address": property_meta["Address"],
+                "Postcode": property_meta["Postcode"],
+                "property_type": eligibility.epc["property-type"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "cavity_type": eligibility.cavity["type"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+    model_api = ModelApi(portfolio_id="ha24-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+    return results_df, scoring_data, nodata
+
+
+def analyse_results(results_df, data, survey_list):
+    analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
+        results_df, how="left", on="row_id"
+    ).merge(
+        survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
+        how="left", on="survey_key"
+    )
+
+    # NEW
+
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    warmfront_sold_eco4 = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
+        ]
+
+    warmfront_sold_gbis = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
+        ]
+    # 1407
+
+    additional_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    additional_gbis_warmfront_not_sold = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            ~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
+        )
+        ]
+
+    additional_gbis_warmfront_not_sold["walls"].value_counts()
+    analysis_data["walls"].value_counts()
+
+    # END NEW
+
+    all_identified_eco = analysis_data[
+        (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
+            ["ECO4 A/W"])) |
+        (analysis_data["eco4_eligible"])
+        ]
+
+    all_identified_gbis = analysis_data[
+        (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
+            ["ECO4 GBIS (ECO+)"])) |
+        (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
+        ]
+
+    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+
+    # Of the ECO jobs, what proportion to we get right
+    warmfront_identified_eco = warmfront_identified[
+        warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
+    ]
+
+    eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
+
+    warmfront_identified_gbis = warmfront_identified[
+        warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
+    ]
+
+    # No gbis for this
+    # gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
+
+    # Additional identified
+    additional_identified_eco = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
+        ]
+
+    additional_identified_eco["eligibility_classification"].value_counts()
+
+    additional_identified_gbis = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
+            analysis_data["warmfront_identified"] == False
+        )
+        ].shape[0]
+    # Future
+    additional_identified_eco_future = analysis_data[
+        (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
+        ].shape[0]
+    additional_identified_gbis_future = analysis_data[
+        (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
+            analysis_data["warmfront_identified"] == False
+        )
+        ].shape[0]
+
+
+def app():
+    data, survey_list = load_data()
+
+    data["row_id"] = ["ha24_" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    created_at = datetime.now().isoformat()
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    results_df, scoring_data, nodata = get_epc_data(
+        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+    )
+
+    # Pickle results just in case
+    # import pickle
+    # with open("ha24_10_jan.pickle", "wb") as f:
+    #     pickle.dump(
+    #         {
+    #             "scoring_data": scoring_data,
+    #             "results": results_df,
+    #             "nodata": nodata
+    #         }, f
+    #     )
+
+    # Read in pickle
+    # import pickle
+    # with open("ha24_10_jan.pickle", "rb") as f:
+    #     saved = pickle.load(f)
+    # scoring_data = saved["scoring_data"]
+    # results_df = saved["results"]
+    # nodata = saved["nodata"]
--- a/etl/eligibility/ha_15_32/ha25_app.py
+++ b/etl/eligibility/ha_15_32/ha25_app.py
@ -0,0 +1,883 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+import re
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_data():
+    workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx', data_only=True)
+    sheet = workbook.active
+
+    rows_data = []
+    rows_colors = []
+    for row in sheet.iter_rows(min_row=1, values_only=True):  # use values_only=True to get values
+
+        row_data = list(row)  # No need for comprehension, values_only=True returns a tuple of values
+        rows_data.append(row_data)
+
+    # Headers are on the final row. Pop them off and store them and then remove them from rows_data
+    headers = rows_data.pop()
+    # The postcode header is None, so we replace it with "postcode"
+    headers[-1] = "postcode"
+
+    # Handle colours separately
+    for row in sheet.iter_rows(min_row=1, values_only=False):
+        # Assume first cell color is indicative of entire row
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        rows_colors.append(row_color)
+
+    # Remove the final row of colours, which is the header
+    rows_colors.pop()
+
+    asset_list = pd.DataFrame(rows_data, columns=headers)
+    asset_list['row_color'] = rows_colors
+
+    asset_list["row_colour_name"] = np.where(
+        asset_list["row_color"] == "FFFF0000", "red",
+        np.where(asset_list["row_color"] == "FF00B050", "green", "yellow")
+    )
+
+    asset_list["row_colour_code"] = np.where(
+        asset_list["row_colour_name"] == "red", "does not meet criteria",
+        np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
+    )
+
+    asset_list["address"] = asset_list["T1_Address"].copy().str.lower()
+    asset_list["address"] = asset_list["address"].str.replace("flat", "")
+    asset_list["address"] = asset_list["address"].str.strip()
+
+    split_addresses = asset_list['address'].str.split(' ', expand=True)
+    split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7',
+                               'address8',
+                               'address9', 'address10', 'address11', 'address12', 'address13', 'address14', ]
+    split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "")
+
+    # We could re-concatenate but we only care about HouseNo for the moment
+    asset_list = pd.concat([asset_list, split_addresses[["HouseNo"]]], axis=1)
+    asset_list["postcode"] = asset_list["postcode"].str.strip()
+
+    # We analysis historical ECO3 survey list
+    eco3_survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx')
+    eco3_survey_sheet = eco3_survey_workbook["CAVITY"]
+
+    eco3_survey_rows = []
+    eco3_survey_colors = []
+
+    for row in eco3_survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        eco3_survey_rows.append(row_data)
+        eco3_survey_colors.append(row_color)
+
+    # Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
+    eco3_survey_list = pd.DataFrame(eco3_survey_rows, columns=[cell.value for cell in eco3_survey_sheet[1]])
+    eco3_survey_list["row_colour"] = eco3_survey_colors
+    # Remove rows where street name is missing
+    eco3_survey_list = eco3_survey_list[~pd.isnull(eco3_survey_list["Street / Block Name"])]
+    # We need to parse the row colours
+    # We have the following mappings:
+    # FF7030A0: purple
+    # FF92D050: green
+    # FFFF0000: red
+    # FFFFFF00: yellow
+    # FF38FD23: green
+    eco3_survey_list["row_colour_name"] = np.where(
+        eco3_survey_list["row_colour"] == "FF7030A0", "purple",
+        np.where(eco3_survey_list["row_colour"] == "FF92D050", "green",
+                 np.where(eco3_survey_list["row_colour"] == "FFFF0000", "red",
+                          np.where(eco3_survey_list["row_colour"] == "FFFFFF00", "yellow",
+                                   np.where(eco3_survey_list["row_colour"] == "FF38FD23", "green", "unknown")
+                                   )
+                          )
+                 )
+    )
+
+    # We map the meaning:
+    # red: cancelled
+    # green: installed advised install complete
+    # purple: installer advised install complete + post works EPC
+    # yellow: filler row - drop
+    eco3_survey_list["row_colour_code"] = np.where(
+        eco3_survey_list["row_colour_name"] == "red", "cancelled",
+        np.where(eco3_survey_list["row_colour_name"] == "green", "installed advised install complete",
+                 np.where(eco3_survey_list["row_colour_name"] == "purple",
+                          "installer advised install complete + post works EPC",
+                          np.where(eco3_survey_list["row_colour_name"] == "yellow", "filler row - drop", "unknown")
+                          )
+                 )
+    )
+
+    # This is good enough for the indicative cancellation rates
+
+    # We now read in the indicative survey list which identified pospects for ECO4 works
+    eco4_survey_workbook = openpyxl.load_workbook(
+        f'etl/eligibility/ha_15_32/HESTIA - HA 25 ADHOC ISOLATED IDENTIFIED PROPERTIES FOR CWI.xlsx'
+    )
+    eco4_prospect_survey_sheet = eco4_survey_workbook["LiveWest"]
+
+    eco4_prospects_survey_rows = []
+    eco4_prospects_survey_colors = []
+
+    for row in eco4_prospect_survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        eco4_prospects_survey_rows.append(row_data)
+        eco4_prospects_survey_colors.append(row_color)
+
+    # Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
+    eco4_prospects_survey_list = pd.DataFrame(
+        eco4_prospects_survey_rows, columns=[cell.value for cell in eco4_prospect_survey_sheet[1]]
+    )
+    eco4_prospects_survey_list["row_colour"] = eco4_prospects_survey_colors
+
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.lower()
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.strip()
+
+    eco4_prospects_survey_list = eco4_prospects_survey_list[~pd.isnull(eco4_prospects_survey_list["ADDRESS 1"])]
+    eco4_prospects_survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(eco4_prospects_survey_list))]
+
+    # Correct some errors in the survey list
+    eco4_prospects_survey_list["POSTCODE"] = np.where(
+        (eco4_prospects_survey_list["ADDRESS 1"] == "berry park") &
+        (eco4_prospects_survey_list["POSTCODE"] == "PL12 6HP"),
+        "PL12 6EN",
+        eco4_prospects_survey_list["POSTCODE"]
+    )
+
+    # Remove semi colons from address in asset and survey list
+    asset_list["T1_Address"] = asset_list["T1_Address"].str.replace(";", "")
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(";", "")
+
+    # In the prosepcts survey list, we have 6 WALKHAM MEADOWS listed twice, which should be 6a and 6b
+    eco4_prospects_survey_list.loc[838, "NO"] = "6a"
+    eco4_prospects_survey_list.loc[839, "NO"] = "6b"
+
+    # 3, 7, 9 BOLDVENTURE ROAD should be BOLDVENTURE CLOSE
+    eco4_prospects_survey_list["ADDRESS 1"] = np.where(
+        (eco4_prospects_survey_list["ADDRESS 1"] == "boldventure road") &
+        (eco4_prospects_survey_list["NO"].isin([3, 7, 9])),
+        "boldventure close",
+        eco4_prospects_survey_list["ADDRESS 1"]
+    )
+
+    eco4_prospects_survey_list["ADDRESS 1"] = np.where(
+        (eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & (
+            eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
+        "old school road",
+        eco4_prospects_survey_list["ADDRESS 1"]
+    )
+
+    eco4_prospects_survey_list["ADDRESS 1"] = np.where(
+        (eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & (
+            eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & (
+            eco4_prospects_survey_list["NO"] == 52),
+        "drum way",
+        eco4_prospects_survey_list["ADDRESS 1"]
+    )
+
+    # String replace
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
+        "the gulls, collaton road", "the gulls collaton road"
+    )
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
+        "crows-an-eglose", "crows-an-eglos"
+    )
+
+    # We have a high volume of rows that do not match
+    matched = []
+    nomatch = []
+    for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)):
+
+        # Not in the asset list
+        if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN":
+            nomatch.append(row.to_dict())
+            continue
+
+        # Not in the asset list
+        if (row["ADDRESS 1"] == "roberts road") and row["NO"] == 23 and row["POSTCODE"] == "PL5 1DP":
+            nomatch.append(row.to_dict())
+            continue
+
+        # Not in the asset list
+        if row["ADDRESS 1"] in [
+            "kaynton mead", "broadmoor lane", "hoopers barton", "ecos court", "selwood road",
+            "castle street"
+        ]:
+            nomatch.append(row.to_dict())
+            continue
+
+        house_number = row["NO"]
+        if isinstance(house_number, str):
+            house_number = house_number.lower()
+
+            if "flat" in house_number:
+                house_number = house_number.split("flat")[1].strip()
+
+        # Filter on the first line of the address
+        df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
+        if house_number is not None:
+            if df.shape[0] != 1:
+                df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
+        if df.shape[0] != 1:
+            if house_number is not None:
+                df = df[df["HouseNo"] == str(house_number)]
+            if df.shape[0] != 1:
+                if row["POSTCODE"] is not None:
+                    df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
+                if df.shape[0] != 1:
+                    nomatch.append(row.to_dict())
+                    continue
+
+        matched.append(
+            {
+                "survey_key": row["survey_key"],
+                "matched_address": df["T1_Address"].values[0],
+                "survey_house_no": row["NO"],
+                "survey_street_name": row["ADDRESS 1"],
+                "survey_postcode": row["POSTCODE"],
+            }
+        )
+
+    nomatch = pd.DataFrame(nomatch)
+    matched = pd.DataFrame(matched)
+
+    matched["warmfront_identified"] = True
+
+    # Combine asset list and surveys
+    data = asset_list.merge(
+        matched, how="left", left_on="T1_Address", right_on="matched_address",
+    )
+    data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
+
+    lost_identified_properties = eco4_prospects_survey_list[
+        ~eco4_prospects_survey_list["survey_key"].isin(matched["survey_key"])
+    ]
+
+    return data, eco4_prospects_survey_list, lost_identified_properties
+
+
+def map_year_to_age_band(year):
+    try:
+        year = int(year)
+    except ValueError:
+        return "Invalid Year"  # Or any other way you want to handle invalid inputs
+
+    if year < 1900:
+        return "England and Wales: before 1900"
+    elif 1900 <= year <= 1929:
+        return "England and Wales: 1900-1929"
+    elif 1930 <= year <= 1949:
+        return "England and Wales: 1930-1949"
+    elif 1950 <= year <= 1966:
+        return "England and Wales: 1950-1966"
+    elif 1967 <= year <= 1975:
+        return "England and Wales: 1967-1975"
+    elif 1976 <= year <= 1982:
+        return "England and Wales: 1976-1982"
+    elif 1983 <= year <= 1990:
+        return "England and Wales: 1983-1990"
+    elif 1991 <= year <= 1995:
+        return "England and Wales: 1991-1995"
+    elif 1996 <= year <= 2002:
+        return "England and Wales: 1996-2002"
+    elif 2003 <= year <= 2006:
+        return "England and Wales: 2003-2006"
+    elif 2007 <= year <= 2011:
+        return "England and Wales: 2007-2011"
+    else:  # Assuming all remaining years are 2012 onwards
+        return "England and Wales: 2012 onwards"
+
+
+def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    scoring_data = []
+    results = []
+    nodata = []
+
+    property_type_lookup = {
+        "Flat": {"property-type": "Flat", "built-form": None},
+        "Mid Terrace House": {"property-type": "House", "built-form": "Mid-Terrace"},
+        "End Terrace House": {"property-type": "House", "built-form": "End-Terrace"},
+        "Maisonnette": {"property-type": "Flat", "built-form": None},
+        "Semi Detached House": {"property-type": "House", "built-form": "Semi-Detached"},
+        "Detached House": {"property-type": "House", "built-form": "Detached"},
+        "Coach House": {"property-type": "House", "built-form": "Detached"},
+        "Bungalow": {"property-type": "Bungalow", "built-form": None},
+        "Detached Bungalow": {"property-type": "Bungalow", "built-form": "Detached"},
+        "House": {"property-type": "House", "built-form": None},
+        "Semi Detached Bung": {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+        "Bedspace": {"property-type": None, "built-form": None},
+        "Office Buildings": {"property-type": None, "built-form": None},
+        "End Terrace Bungalow": {"property-type": "Bungalow", "built-form": "End-Terrace"},
+        "Mid Terrace Bungalow": {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+        "Bedsit": {"property-type": "Flat", "built-form": None},
+        "Mid Terrace Housekeeping": {"property-type": "House", "built-form": "Mid-Terrace"},
+        "Mid Terrace Housekeeping ": {"property-type": "House", "built-form": "Mid-Terrace"},
+        "End Terrace Housex": {"property-type": "House", "built-form": "End-Terrace"},
+        "Guest Room": {"property-type": None, "built-form": None}
+    }
+
+    for _, property_meta in tqdm(data, total=len(data)):
+
+        searcher = SearchEpc(
+            address1=property_meta["HouseNo"],
+            postcode=property_meta["postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            full_address=property_meta["address"]
+        )
+        searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["T1_AssetType"]][
+            "property-type"]
+        searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["T1_AssetType"]]["built-form"]
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(property_meta)
+            continue
+
+        if searcher.newest_epc.get("estimated"):
+            # We insert the row ID as our proxy for UPRN
+            proxy_uprn = int(property_meta["row_id"].split("_")[1])
+            searcher.newest_epc["uprn"] = proxy_uprn
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+        # We also want to get the penultimate epc
+        # penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+        # if not penultimate_epc:
+        #     penultimate_epc = newest_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        # if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+        #     eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+        #     eligibility.check_gbis_warmfront()
+        #     eligibility.check_eco4_warmfront()
+        #     # If this is the case, we need to update the older epcs
+        #     # We don't update just to make data cleaning easier
+        #     if penultimate_epc.get("estimated") is None:
+        #         older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+        # Full checks
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"]:
+            if eligibility.epc["uprn"] in ["", None]:
+                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+            if eligibility.epc["construction-age-band"] in ["", None]:
+                eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
+
+            # This is not the right place to do this but this is temp
+            if eligibility.epc["extension-count"] in ["", None]:
+                eligibility.epc["extension-count"] = 0
+
+            # Not in the right place but temp
+            if eligibility.epc["built-form"] in ["", None]:
+                if not older_epcs:
+                    eligibility.epc["built-form"] = "Mid-Terrace"
+
+            scoring_dictionary = prepare_model_data_row(
+                property_id=property_meta["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds,
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        results.append(
+            {
+                "row_id": property_meta["row_id"],
+                "uprn": eligibility.epc["uprn"],
+                "Address": property_meta["T1_Address"],
+                "Postcode": property_meta["postcode"],
+                "property_type": eligibility.epc["property-type"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "cavity_type": eligibility.cavity["type"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+    return results_df, scoring_data, nodata
+
+
+def get_epc_data_for_lost_surveys(
+    lost_identified_properties, cleaned, cleaning_data, created_at, photo_supply_lookup,
+    floor_area_decile_thresholds
+):
+    lost_identified_properties["row_id"] = [
+        "lost_surveys_ha25_" + str(i) for i in range(0, len(lost_identified_properties))
+    ]
+
+    scoring_data = []
+    results = []
+    nodata = []
+
+    property_type_lookup = {
+        "MID-TERRACE": {"property-type": "House", "built-form": "Mid-Terrace"},
+        "N/A": {"property-type": "House", "built-form": None},
+        "END-TERRACE": {"property-type": "House", "built-form": "End-Terrace"},
+        "GROUND-FLOOR": {"property-type": "House", "built-form": None},
+        "TOP-FLOOR": {"property-type": "House", "built-form": None},
+        "SEMI-DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
+        "MID-FLOOR": {"property-type": "House", "built-form": None},
+        "TOP-FLOOR FLAT": {"property-type": "House", "built-form": None},
+        "DETACHED": {"property-type": "House", "built-form": "Detached"},
+        "MID-FLOOR FLAT": {"property-type": "House", "built-form": None},
+        "SEMI- DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
+        "NO EPC ON GOV": {"property-type": "House", "built-form": None},
+        "Top-floor flat": {"property-type": "House", "built-form": None},
+        "GROUND-FLOOR FLAT": {"property-type": "House", "built-form": None},
+        "NOT ON GOV SITE": {"property-type": "House", "built-form": None}
+    }
+
+    for _, property_meta in tqdm(lost_identified_properties.iterrows(), total=len(lost_identified_properties)):
+
+        if property_meta["POSTCODE"] is None:
+            continue
+
+        full_address = ", ".join(
+            [str(x) for x in [
+                property_meta["NO"], property_meta["ADDRESS 1"], property_meta["ADDRESS 2"], property_meta["ADDRESS 3"]
+            ] if x is not None]
+        )
+
+        searcher = SearchEpc(
+            address1=str(property_meta["NO"]),
+            postcode=property_meta["POSTCODE"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            full_address=full_address
+        )
+
+        property_type_key = property_meta["PROPERTY TYPE"]
+        if property_type_key is not None:
+            searcher.ordnance_survey_client.property_type = property_type_lookup[property_type_key.strip()][
+                "property-type"]
+            searcher.ordnance_survey_client.built_form = property_type_lookup[property_type_key.strip()][
+                "built-form"]
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(property_meta)
+            continue
+
+        if searcher.newest_epc.get("estimated"):
+            # We insert the row ID as our proxy for UPRN
+            proxy_uprn = int(property_meta["row_id"].split("_")[-1])
+            searcher.newest_epc["uprn"] = proxy_uprn
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+        # We also want to get the penultimate epc
+        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+        if not penultimate_epc:
+            penultimate_epc = newest_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+            eligibility.check_gbis_warmfront()
+            eligibility.check_eco4_warmfront()
+            # If this is the case, we need to update the older epcs
+            # We don't update just to make data cleaning easier
+            if penultimate_epc.get("estimated") is None:
+                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+        # Full checks
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"] & (eligibility.epc["construction-age-band"] not in ["", None]):
+            if eligibility.epc["uprn"] in ["", None]:
+                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+            scoring_dictionary = prepare_model_data_row(
+                property_id=property_meta["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds,
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        results.append(
+            {
+                "row_id": property_meta["row_id"],
+                "uprn": eligibility.epc["uprn"],
+                "Address": property_meta["ADDRESS 1"],
+                "Postcode": property_meta["POSTCODE"],
+                "property_type": eligibility.epc["property-type"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "cavity_type": eligibility.cavity["type"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+    return results_df, scoring_data, nodata
+
+
+def analyse_results(results_df, data, eco4_prospects_survey_list):
+    analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
+        results_df, how="left", on="row_id"
+    )
+
+    analysis_data = analysis_data.merge(
+        eco4_prospects_survey_list[["survey_key", "ADDRESS 1", "NO", "POSTCODE"]],
+        how="left", on="survey_key"
+    )
+
+    # NEW
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    warmfront_identified = analysis_data[
+        (analysis_data["warmfront_identified"] == True)
+    ]  # 2204
+
+    # Because we don't know which property is for which scheme, we'll just look at what we found
+    ideal_eco4 = analysis_data[
+        (analysis_data["eco4_eligible"] == True) &
+        (analysis_data["roof_insulation_thickness_numeric"] <= 100) &
+        (analysis_data["sap"] <= 54)
+        ]  # 335
+
+    gbis = analysis_data[
+        (analysis_data["gbis_eligible"] == True) &
+        ~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
+        ]
+
+    ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
+
+
+def analyse_lost_surveys(results_df):
+    results_df["roof_insulation_thickness"] = np.where(
+        pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
+    )
+    results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    ideal_eco4 = results_df[
+        (results_df["eco4_eligible"] == True) &
+        (results_df["roof_insulation_thickness_numeric"] <= 100) &
+        (results_df["sap"] <= 54)
+        ]  # 25
+
+    gbis = results_df[
+        (results_df["gbis_eligible"] == True) &
+        ~results_df["row_id"].isin(ideal_eco4["row_id"].values)
+        ]  # 82
+
+
+def app():
+    data, eco4_prospects_survey_list, lost_identified_properties = load_data()
+
+    data["row_id"] = ["ha25_" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    created_at = datetime.now().isoformat()
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    results_df, scoring_data, nodata = get_epc_data(
+        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+    )
+    # Pickle the outputs
+    # Old data was ha25.pickle
+    # import pickle
+    # with open("ha25_10_jan.pickle", "wb") as f:
+    #     pickle.dump(
+    #         {
+    #             "results_df": results_df,
+    #             "scoring_data": scoring_data,
+    #             "nodata": nodata
+    #         },
+    #         f
+    #     )
+
+    # Load in pickle
+    import pickle
+    with open("ha25_10_jan.pickle", "rb") as f:
+        saved = pickle.load(f)
+    results_df = saved["results_df"]
+    scoring_data = saved["scoring_data"]
+    nodata = saved["nodata"]
--- a/etl/eligibility/ha_15_32/ha33_app.py
+++ b/etl/eligibility/ha_15_32/ha33_app.py
@ -0,0 +1,326 @@
+import msgpack
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from backend.app.utils import read_parquet_from_s3
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+
+import re
+
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_ha_33():
+    """
+    Load HA33 data
+    :return:
+    """
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+
+    files = [
+        "HA 33 Assets 1 of 4.csv",
+        "HA 33 Assets 2 of 4.csv",
+        "HA 33 Assets 3 of 4.csv",
+        "HA 33 Assets 4 of 4.csv"
+    ]
+
+    data = []
+    for file in files:
+        part = pd.read_csv(f"etl/eligibility/ha_15_32/{file}", low_memory=False)
+        cols_to_top = [c for c in part.columns if "Unnamed:" in c]
+        part = part.drop(columns=cols_to_top)
+        data.append(part)
+
+    data = pd.concat(data)
+
+    return data
+
+
+def standardise_ha33(data):
+    data = data[~pd.isnull(data["ADDRESS"])]
+
+    split_addresses = data['ADDRESS'].str.split(',', expand=True)
+    split_addresses.columns = ['address1', 'address2', 'address3', 'address4', 'address5']
+
+    data = pd.concat([data, split_addresses], axis=1)
+    del split_addresses
+
+    # Using regex to replace 'FT {number}' or 'FT{number}', with '{number}'
+    data['address1'] = data['address1'].str.replace(r'FT\s*(\d+)', r'\1', regex=True)
+
+    data.columns = [col.strip() for col in data.columns]
+
+    # TODO: we have 23 THIRTY SEVENTH AVENUE, can we replace THIRTY SEVENTH with 37TH
+
+    return data
+
+
+def get_ha_33data(data, cleaned, cleaning_data, created_at):
+    house_type_lookup = {
+        "Bungalow": "Bungalow",
+        "Flat": "Flat",
+        'House': "House",
+        'Maisonette': "Maisonette",
+        'Flalolflfp mujjjjunjimj': "Flat",
+        'STUDIO': "Flat",
+    }
+
+    # house = data[data["row_id"] == "h3390"].squeeze()
+
+    flat_pattern = r'flat\s+(\d+)'
+
+    # data = data[data["row_id"].isin(eco_row_ids)]
+
+    scoring_data = []
+    results = []
+    nodata = []
+    for _, house in tqdm(data.iterrows(), total=len(data)):
+
+        # Check if we gave a flat in address 3
+        if re.search(flat_pattern, house["address2"].lower(), re.IGNORECASE):
+            address1 = house["address2"].strip()
+        else:
+            address1 = house["address1"].strip()
+
+        # I.e. just a number
+        if len(address1) <= 3:
+            address1 = address1 + " " + house["address2"].strip()
+
+        searcher = SearchEpc(
+            address1=address1,
+            postcode=house["POST CODE"]
+        )
+
+        response = searcher.search()
+        if response["status"] == 204:
+            nodata.append(house["row_id"])
+            continue
+
+        newest_epc, older_epcs, _ = searcher.retrieve(
+            property_type=house_type_lookup.get(house["PROPERTY TYPE"], None),
+            address=house["ADDRESS"],
+        )
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        # If the house is not identified, we do a full gbis and eco4 check
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"]:
+            scoring_dictionary = prepare_model_data_row(
+                property_id=house["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        # If nothing is eligible or gbis is eligible, then we make a record this
+        results.append(
+            {
+                "row_id": house["row_id"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+            }
+        )
+
+    # import pickle
+    # with open("ha33_results.pickle", "wb") as f:
+    #     pickle.dump({
+    #         "results": results,
+    #         "scoring_data": scoring_data,
+    #         "nodata": nodata
+    #     }, f)
+    # with open("ha33_results.pickle", "rb") as f:
+    #     data = pickle.load(f)
+    #     results = data["results"]
+    #     scoring_data = data["scoring_data"]
+    #     nodata = data["nodata"]
+
+    scoring_df = pd.DataFrame(scoring_data)
+    # Implement the same process that is being used in the recommendation engine to cleaning scoring_df
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    # merge the predictions onto the scoring_df
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+
+    return results_df, scoring_data, nodata
+
+
+def analyse_ha_33(results_df, data):
+    # results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
+    #
+    # results_df_social["tenure"].value_counts()
+
+    data[data["row_id"].isin(results_df["row_id"].values)]["PROPERTY TYPE"].value_counts()
+
+    n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
+    n_eco4 = results_df["eco4_eligible"].sum()
+    n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
+
+    eco_eligibile = results_df[results_df["eco4_eligible"]]
+    eco_eligibile["walls"].value_counts()
+    eco_eligibile["roof"].value_counts()
+
+    results_df[results_df["gbis_eligible"] | results_df["eco4_eligible"]]["tenure"].value_counts()
+
+    results_df_social["eligibility_classification"].value_counts()
+
+    future_possibilities_eco = results_df[
+        (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+    future_possibilities_gbis = results_df[
+        (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
+            ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+
+def app():
+    """
+    Because HA33 is large, we deal with it separately
+    :return:
+    """
+
+    data = load_ha_33()
+
+    data = standardise_ha33(data)
+    data["row_id"] = ["h33" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_parquet_from_s3(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    created_at = datetime.now().isoformat()
+
+    results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at)
+
+    # Read in
+    import pickle
+    with open("ha33_results.pickle", "rb") as f:
+        data = pickle.load(f)
+    results_df = pd.DataFrame(data["results"])
+    scoring_data = data["scoring_data"]
+    nodata = data["nodata"]
--- a/etl/eligibility/ha_15_32/ha4_app.py
+++ b/etl/eligibility/ha_15_32/ha4_app.py
@ -0,0 +1,328 @@
+import os
+import msgpack
+from pathlib import Path
+from datetime import datetime
+import numpy as np
+import pandas as pd
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+import re
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_ha_4():
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+
+    data = pd.read_csv(f"etl/eligibility/ha_15_32/HA 4 Asset List.csv", low_memory=False)
+    return data
+
+
+def standardise_ha_4(data):
+    # Location name contains some strings like {0664} which we remove
+    data['Location Name'] = data['Location Name'].str.replace('\{.*?\}', '', regex=True)
+
+    # Trim whitespace from either end of location name
+    data["Location Name"] = data["Location Name"].str.strip()
+
+    # Remove any unusable postcodes
+    data = data[data["Post Code"] != '\\\\'].copy()
+
+    # Some specific replacements
+    data["Location Name"] = np.where(
+        data["Location Name"] == "Calderbrook Pl & Cog La",
+        "Calderbrook Place",
+        data["Location Name"]
+    )
+
+    return data
+
+
+def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    scoring_data = []
+    results = []
+    nodata = []
+    for _, property_meta in tqdm(data.iterrows(), total=len(data)):
+        # For many of the entries in this dataset, we're actually given an entire building, so we EPCs for every
+        # building
+        searcher = SearchEpc(
+            address1=property_meta["Address Line 1"],
+            postcode=property_meta["Post Code"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            property_type=property_type_lookup.get(house["Archetype"]),
+        )
+
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            searcher = SearchEpc(
+                address1=property_meta["Location Name"],
+                postcode=property_meta["Post Code"],
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key=None,
+                property_type=property_type_lookup.get(house["Archetype"]),
+            )
+            searcher.search()
+
+        if searcher.newest_epc is None:
+            nodata.append(house["row_id"])
+            continue
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+
+        searcher.search()
+
+        if searcher.data is None:
+            nodata.append(property_meta.to_dict())
+            continue
+
+        epcs = searcher.data["rows"]
+        epcs = pd.DataFrame(epcs)
+
+        # Take the newest EPC by UPRN
+        epcs = epcs.sort_values(by=["lodgement-date"], ascending=False)
+        newest_epcs = epcs.drop_duplicates(subset=["uprn"], keep="first")
+
+        # For each EPC, we now check eligibility
+        for _, epc in newest_epcs.iterrows():
+            eligibility = Eligibility(epc=epc.to_dict(), cleaned=cleaned)
+            eligibility.check_gbis_warmfront()
+            eligibility.check_eco4_warmfront()
+
+            # If the house is not identified, we do a full gbis and eco4 check
+            eligibility.check_gbis()
+            eligibility.check_eco4()
+
+            if eligibility.eco4_warmfront["eligible"]:
+                # We get old_eps
+                old_data = epcs[
+                    (epcs["uprn"] == epc["uprn"]) &
+                    (epcs["lmk-key"] != epc["lmk-key"])
+                    ].to_dict("records")
+
+                full_sap_epc = epcs[
+                    (epcs["uprn"] == epc["uprn"]) &
+                    (epcs["transaction-type"] == "new dwelling")
+                    ].to_dict("records")
+
+                scoring_dictionary = prepare_model_data_row(
+                    property_id=eligibility.epc["uprn"],
+                    modelling_epc=eligibility.epc,
+                    cleaned=cleaned,
+                    cleaning_data=cleaning_data,
+                    created_at=created_at,
+                    old_data=old_data,
+                    full_sap_epc=full_sap_epc
+                )
+                scoring_data.extend(scoring_dictionary)
+
+            results.append(
+                {
+                    "uprn": epc["uprn"],
+                    "Location Name": property_meta["Location Name"],
+                    "Post Code": property_meta["Post Code"],
+                    "property_type": eligibility.epc["property-type"],
+                    "gbis_eligible": eligibility.gbis_warmfront,
+                    "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                    "eco4_message": eligibility.eco4_warmfront["message"],
+                    "sap": float(eligibility.epc["current-energy-efficiency"]),
+                    "gbis_eligible_future": eligibility.gbis["eligible"],
+                    "gbis_eligible_future_message": eligibility.gbis["message"],
+                    "eco4_eligible_future": eligibility.eco4["eligible"],
+                    "eco4_eligible_future_message": eligibility.eco4["message"],
+                    # Property components
+                    "roof": eligibility.roof["clean_description"],
+                    "walls": eligibility.walls["clean_description"],
+                    "cavity_type": eligibility.cavity["type"],
+                    "heating": eligibility.epc["mainheat-description"],
+                    "tenure": eligibility.tenure,
+                    "date_epc": eligibility.epc["lodgement-date"],
+                }
+            )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "uprn"}).merge(
+        results_df[["uprn", "sap"]], how="left", on="uprn"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "uprn"]],
+        how="left",
+        on="uprn"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    results_df = results_df[~pd.isnull(results_df["uprn"])]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "uprn": row["uprn"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="uprn"
+    )
+    # We have some properties that are duplicated so we take just one instance
+    results_df = results_df.drop_duplicates(subset=["uprn"])
+
+    return results_df, scoring_data, nodata
+
+
+def analyse_ha_4(results_df, data):
+    n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
+    n_eco4 = results_df["eco4_eligible"].sum()
+    n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
+
+    eco_eligibile = results_df[results_df["eco4_eligible"]]
+    eco_eligibile["eligibility_classification"].value_counts()
+
+    future_possibilities_eco = results_df[
+        (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+    future_possibilities_gbis = results_df[
+        (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
+            ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+    total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
+
+
+def app():
+    data = load_ha_4()
+
+    data = standardise_ha_4(data)
+
+    data["row_id"] = ["h4" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    created_at = datetime.now().isoformat()
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    results_df, scoring_data, nodata = get_ha_4_data(
+        data=data,
+        cleaned=cleaned,
+        cleaning_data=cleaning_data,
+        created_at=created_at,
+        photo_supply_lookup=photo_supply_lookup,
+        floor_area_decile_thresholds=floor_area_decile_thresholds
+    )
+
+    # Store the data locally as a pickle
+    # import pickle
+    # with open("ha_4.pickle", "wb") as f:
+    #     pickle.dump(
+    #         {
+    #             "results_df": results_df,
+    #             "scoring_data": scoring_data,
+    #             "nodata": nodata
+    #         }, f)
+
+    # Read in
+    # import pickle
+    # with open("ha_4.pickle", "rb") as f:
+    #     data = pickle.load(f)
+    # results_df = data["results_df"]
+    # scoring_data = data["scoring_data"]
+    # nodata = data["nodata"]
--- a/etl/eligibility/ha_15_32/ha7_app.py
+++ b/etl/eligibility/ha_15_32/ha7_app.py
@ -0,0 +1,383 @@
+import os
+import msgpack
+import openpyxl
+from openpyxl.styles.colors import COLOR_INDEX
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+OS_API_KEY = os.getenv("ORDNANCE_SURVEY_API_KEY")
+
+
+def load_data():
+    """
+    Load the data from the excel
+    """
+
+    workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 7 ASSET LIST.xlsx')
+    sheet = workbook.active
+
+    # Prepare lists to collect rows data and their colors
+    rows_data = []
+    rows_colors = []
+    for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        row_color = COLOR_INDEX[row_color]
+        rows_data.append(row_data)
+        rows_colors.append(row_color)
+
+    df = pd.DataFrame(rows_data, columns=[cell.value for cell in sheet[1]])
+
+    # Add the row colors as a new column
+    df['row_color'] = rows_colors
+    df.columns.values[8] = "is_active"
+
+    # Remove None columns
+    df = df.dropna(axis=1, how='all')
+    # We now parse the colours
+    df["row_color"].unique()
+    df["row_colour_name"] = np.where(
+        df["row_color"] == "0000FFFF", "red",
+        np.where(df["row_color"] == "00FF00FF", "green", "yellow")
+    )
+    df["row_code"] = np.where(
+        df["row_colour_name"] == "red", "invalid",
+        np.where(df["row_colour_name"] == "green", "potential ECO4", "needs criteria change")
+    )
+
+    return df
+
+
+def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    property_type_lookup = {
+        # "Mid Terrace": "Mid-Terrace",
+        # "End Terrace": "End-Terrace",
+        # "Semi Detached": "Semi-Detached",
+        # "Detached": "Detached",
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+    }
+
+    scoring_data = []
+    results = []
+    nodata = []
+    for _, house in tqdm(data.iterrows(), total=len(data)):
+
+        if house["Address"]:
+            address = house["Address"]
+        else:
+            address = house["Address2"]
+
+        searcher = SearchEpc(
+            address1=address,
+            postcode=house["Postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            property_type=property_type_lookup.get(house["Archetype"]),
+        )
+
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(house["row_id"])
+            continue
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+        # If the house is not identified, we do a full gbis and eco4 check
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"]:
+            scoring_dictionary = prepare_model_data_row(
+                property_id=house["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        # If nothing is eligible or gbis is eligible, then we make a record this
+        results.append(
+            {
+                "row_id": house["row_id"],
+                "address": house["Address"],
+                "postcode": house["Postcode"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                **newest_epc,
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+    # Implement the same process that is being used in the recommendation engine to cleaning scoring_df
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+
+    return results_df, scoring_data, nodata
+
+
+def analyse_ha_7(results_df, data):
+    analysis_data = results_df.merge(
+        data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
+    )
+
+    analysis_data["row_code"].value_counts()
+
+    # NEW
+
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    ideal_eco4 = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (
+            analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    secondary_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (
+            analysis_data["roof_insulation_thickness_numeric"] > 100)
+        ]
+
+    # underperforming cavities
+    underperforming_cavities = analysis_data[
+        (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
+            analysis_data["cavity_age"] > 9 * 365
+        ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    identified_gbis_not_sold = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (
+            analysis_data["eco4_eligible"] == False
+        )
+        ]
+
+    wf_identified = analysis_data[
+        (analysis_data["row_code"] == "potential ECO4")
+    ]
+
+    # END NEW
+
+    warmfront_identification = analysis_data["row_code"].value_counts()
+    warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
+    warmfront_identified["walls"].value_counts(normalize=True)
+
+    analysis_data["Construction Year Band"].value_counts(normalize=True)
+
+    # Number of days from today
+
+    days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
+    days_to_today.mean()
+
+    property_types = analysis_data["Property Type"].value_counts()
+
+    n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
+
+    eco_identified = results_df[results_df["eco4_eligible"]]
+    n_eco4 = eco_identified["eco4_eligible"].sum()
+    gbis_identified = results_df[~results_df["eco4_eligible"] & results_df["gbis_eligible"]]
+    n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
+
+    eco_eligibile = results_df[results_df["eco4_eligible"]]
+    eco_eligibile["eligibility_classification"].value_counts()
+
+    future_possibilities_eco = results_df[
+        (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+    future_possibilities_gbis = results_df[
+        (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
+            ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+    total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
+
+
+def app():
+    data = load_data()
+    data["row_id"] = ["ha7" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    created_at = datetime.now().isoformat()
+
+    results_df, scoring_data, nodata = get_ha7_data(
+        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+    )
+
+    # Pickle results
+    # import pickle
+    # with open("ha7_results_jan_10.pkl", "wb") as f:
+    #     pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
+
+    # Read in the old data
+    # import pickle
+    # with open("ha7_results_jan_10.pkl", "rb") as f:
+    #     old_data = pickle.load(f)
+    # results_df = old_data["results_df"]
+    # scoring_data = old_data["scoring_data"]
+    # nodata = old_data["nodata"]
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
--- a/etl/eligibility/ha_15_32/requirements.txt
+++ b/etl/eligibility/ha_15_32/requirements.txt
@ -0,0 +1,11 @@
+pandas
+pydantic==1.10.11
+epc-api-python==1.0.2
+msgpack
+tqdm
+python-dotenv
+boto3
+textblob
+pyarrow==12.0.1
+fuzzywuzzy
+python-Levenshtein
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@ -5,6 +5,10 @@ from BaseUtility import Definitions
 from etl.epc.settings import (
    DATA_PROCESSOR_SETTINGS,
    EARLIEST_EPC_DATE,
+    IGNORED_TRANSACTION_TYPES,
+    IGNORED_FLOOR_LEVELS,
+    IGNORED_PROPERTY_TYPES,
+    IGNORED_TENURES,
    FULLY_GLAZED_DESCRIPTIONS,
    AVERAGE_FIXED_FEATURES,
    BUILT_FORM_REMAP,
@ -24,8 +28,14 @@ from recommendations.rdsap_tables import FLOOR_LEVEL_MAP

 from typing import List

+# TODO: change the setting columns to lower
+STARTING_SUFFIX_COMPONENT_COLS = [x.lower() for x in STARTING_SUFFIX_COMPONENT_COLS]
+NO_SUFFIX_COMPONENT_COLS = [x.lower() for x in NO_SUFFIX_COMPONENT_COLS]
+ENDING_SUFFIX_COMPONENT_COLS = [x.lower() for x in ENDING_SUFFIX_COMPONENT_COLS]
+POTENTIAL_COLUMNS = [x.lower() for x in POTENTIAL_COLUMNS]
+
 # These lookups are used to clean the construction age band
-bounds_map = {
+construction_age_bounds_map = {
    "England and Wales: before 1900": {"l": 0, "u": 1899},
    "England and Wales: 1930-1949": {"l": 1930, "u": 1949},
    "England and Wales: 1900-1929": {"l": 1900, "u": 1929},
@ -40,13 +50,13 @@ bounds_map = {
    "England and Wales: 2012 onwards": {"l": 2012, "u": 3000},
 }

-remap = {
+construction_age_remap = {
    "England and Wales: 2007 onwards": "England and Wales: 2007-2011"
 }

 expanded_map = {
    i: [
-        label for label, bounds in bounds_map.items() if (i <= bounds["u"]) and (i >= bounds['l'])
+        label for label, bounds in construction_age_bounds_map.items() if (i <= bounds["u"]) and (i >= bounds['l'])
    ][0] for i in range(0, 3001)
 }

@ -59,26 +69,205 @@ def is_int(x):
        return False


-class DataProcessor:
+class EPCDataProcessor:
    """
    Handle data loading and data preprocessing
    """

-    def __init__(self, filepath: Path | None, newdata: bool = False) -> None:
+    def __init__(self, data: pd.DataFrame | None = None, cleaning_averages: pd.DataFrame | None = None,
+                 run_mode: str = "training", violation_mode: bool = False) -> None:
        """
        :param filepath: If specified, is the physical location of the data
-        :param newdata: Indicates if we are processing new, testing data.
+        :param is_newdata: Indicates if we are processing new, testing data.
                        In this instance, there are some operations we do not
                        want to perform, such as confine_data()
        """
-        self.filepath = filepath
-        self.data = None
-        self.newdata = newdata
+        is_data_a_dataframe = isinstance(data, pd.DataFrame)
+        self.data: pd.DataFrame = data if is_data_a_dataframe else pd.DataFrame()

-    def load_data(self, low_memory=False) -> None:
-        if not self.filepath:
+        is_cleaning_averages_a_dataframe = isinstance(cleaning_averages, pd.DataFrame)
+        self.cleaning_averages: pd.DataFrame = cleaning_averages if is_cleaning_averages_a_dataframe else pd.DataFrame()
+
+        # FOR NOW IF VIOLATION MODE IS ON, WE USE RUN MODE AS NEWDATA
+        self.violation_mode = violation_mode
+        if run_mode not in ["training", "newdata"]:
+            raise ValueError("Run mode must be either training or newdata")
+        self.run_mode = run_mode if not violation_mode else "newdata"
+
+    def prepare_data(self, filepath: Path | str | None = None) -> None:
+        """
+        Given the run mode, we apply the relevant pipeline steps
+        Ignore step is used to highlight which steps are not needed in newdata
+        """
+
+        ignore_step = True if self.run_mode == "newdata" else False
+
+        if filepath is not None:
+            self.load_data(filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
+
+        if len(self.data) == 0:
+            raise Exception("No data to process - check filepath/ data being passed in")
+
+        self.confine_data(ignore_step=ignore_step)
+        self.remap_anomalies()
+        self.remap_floor_level(ignore_step=ignore_step)
+        self.remap_build_form()
+        self.cast_data_column_values_to_lower()
+        self.standardise_construction_age_band(ignore_step=ignore_step)
+        self.clean_missing_rooms(ignore_step=ignore_step)
+        self.recast_df_columns(
+            column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
+        )
+        self.clean_multi_glaze_proportion(ignore_step=ignore_step)
+        self.clean_photo_supply()
+        self.retain_multiple_epc_properties(
+            epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"], ignore_step=ignore_step
+        )
+
+        self.fill_na_fields()
+
+        self.sort_data_by_uprn_lodgement_date(ignore_step=ignore_step)
+
+        # Final re-casting after data transformed and prepared
+        self.recast_df_columns(column_mappings=COLUMNTYPES, auto_subset_columns=True)
+        self.recast_all_data(column_mappings=COLUMNTYPES, auto_subset_columns=True)
+        self.na_remapping(auto_subset_columns=True)
+
+        self.fill_invalid_constituency_fields(ignore_step=ignore_step)
+
+        self.make_cleaning_averages(ignore_step=ignore_step)
+        self.add_local_authority_to_cleaning_average(ignore_step=ignore_step)
+
+        # TODO: check if this has impact on training dataset
+        # cleaned_data = self.apply_averages_cleaning(
+        #     data_to_clean=self.data,
+        #     cleaning_data=self.cleaning_averages,
+        #     cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        #     colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+        # )
+
+        # When running in newdata mode, cleaning_averages has lower cases so we co-erce back to upper
+        cleaning_averages = self.cleaning_averages.copy()
+        if self.run_mode == "newdata":
+            cleaning_averages.columns = cleaning_averages.columns.str.upper()
+
+        cleaned_data = self.apply_averages_cleaning(
+            data_to_clean=self.data,
+            cleaning_data=cleaning_averages,
+            cols_to_merge_on=COLUMNS_TO_MERGE_ON,
+        )
+
+        self.data = self.data if cleaned_data is None else cleaned_data
+
+        self.cast_cleaning_averages_columns_to_lower(ignore_step=ignore_step)
+        self.cast_data_columns_to_lower()
+
+    def cast_data_columns_to_lower(self):
+        """
+        Convert all columns names to lower
+        """
+        self.data.columns = self.data.columns.str.lower()
+
+    def cast_cleaning_averages_columns_to_lower(self, ignore_step: bool = False):
+        """
+        Convert all column names to lower
+        No need in newdata mode
+        """
+
+        if ignore_step:
+            return
+
+        self.cleaning_averages.columns = self.cleaning_averages.columns.str.lower()
+
+    def add_local_authority_to_cleaning_average(self, ignore_step: bool = False):
+        """
+        Add the Local authority column to the cleaning averages
+        No need in newdata mode
+        """
+
+        if ignore_step:
+            return
+
+        self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
+
+    def fill_invalid_constituency_fields(self, ignore_step: bool = False):
+        """
+        For some weird cases, where data has missing constituency, we add a dummy value
+        """
+        if self.violation_mode:
+            # TODO: to fill in
+            return
+
+        if ignore_step:
+            return
+
+        self.data = self.data.fillna({"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]})
+
+    def sort_data_by_uprn_lodgement_date(self, ignore_step: bool = False):
+        """
+        Order data by uprn and lodgement data
+        No Violation mode needed
+        """
+
+        if ignore_step:
+            return
+
+        self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
+
+    def cast_data_column_values_to_lower(self):
+        """
+        For given columns, cast values to lower
+        No Violation mode or newdata modes required
+        """
+        convert_to_lower = ["TRANSACTION_TYPE"]
+        for col in convert_to_lower:
+            self.data[col] = self.data[col].str.lower()
+
+    def remap_build_form(self):
+        """
+        Remap build form to standard values
+        No Violation mode or newdata modes required
+        """
+        self.data["BUILT_FORM"] = self.data["BUILT_FORM"].replace(BUILT_FORM_REMAP)
+
+    def remap_anomalies(self):
+        """
+        Remap anomalies to None
+        No Violation mode or newdata modes required
+        """
+
+        # Map all anomaly values to None
+        data_anomaly_map = dict(
+            zip(
+                Definitions.DATA_ANOMALY_MATCHES,
+                [None] * len(Definitions.DATA_ANOMALY_MATCHES),
+            )
+        )
+
+        # Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
+        data = self.data.replace(data_anomaly_map)
+        data = data.replace(np.NAN, None)
+
+        self.data = data
+
+    def remap_floor_level(self, ignore_step: bool = False):
+        """
+        Remap floor level to standard values
+        """
+
+        if self.violation_mode:
+            # TODO: We need to handle this case
+            return
+
+        if ignore_step:
+            return
+
+        self.data["FLOOR_LEVEL"] = self.data["FLOOR_LEVEL"].replace(FLOOR_LEVEL_MAP)
+
+    def load_data(self, filepath, low_memory=False) -> None:
+        if not filepath:
            raise ValueError("No filepath specified")
-        self.data = pd.read_csv(self.filepath, low_memory=low_memory)
+        self.data = pd.read_csv(filepath, low_memory=low_memory)

    def insert_data(self, data: pd.DataFrame) -> None:
        self.data = data
@ -90,11 +279,11 @@ class DataProcessor:
            return x

        # Next, we check if it's a value in our map
-        if bounds_map.get(x):
+        if construction_age_bounds_map.get(x):
            return x

        # We check if it's a standard remap value
-        remap_value = remap.get(x, None)
+        remap_value = construction_age_remap.get(x, None)
        if remap_value:
            return remap_value

@ -105,12 +294,19 @@ class DataProcessor:

        raise NotImplementedError("Not handled the case for value %s" % x)

-    def standardise_construction_age_band(self):
+    def standardise_construction_age_band(self, ignore_step: bool = False):
        """
        This function will tidy up some of the non-standard values that are populated in the construction age
        band, which is useful for cleaning
        """

+        if self.violation_mode:
+            # TODO: to fill in 
+            return
+
+        if ignore_step:
+            return
+
        self.data["CONSTRUCTION_AGE_BAND"] = self.data["CONSTRUCTION_AGE_BAND"].apply(
            lambda x: self.clean_construction_age_band(x)
        )
@ -119,7 +315,7 @@ class DataProcessor:
            ~pd.isnull(self.data["CONSTRUCTION_AGE_BAND"])
        ]

-    def clean_missing_rooms(self):
+    def clean_missing_rooms(self, ignore_step: bool = False):
        """
        For the number of heated rooms and number of habitable rooms, we clean these values up front,
        based on property archetype and age
@ -127,6 +323,14 @@ class DataProcessor:
        TODO: We could use a model based impution approach for possibly more accurate cleaning
        """

+        if self.violation_mode:
+            # TODO: to fill in
+            return
+
+        if ignore_step:
+            return
+
+        # TODO: DO we want to move this out of this function? (i.e. alter the data before we do any cleaning)
        self.data["POSTAL_AREA"] = self.data["POSTCODE"].apply(lambda x: x.split(" ")[0])

        def apply_clean(data, matching_columns):
@ -164,59 +368,78 @@ class DataProcessor:
                    break
                to_index -= 1

-    def pre_process(self) -> pd.DataFrame:
-        """
-        Load data and begin initial cleaning
-        """
-        if self.data is None:
-            self.load_data(low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
+    # def pre_process(self, filepath: Path | None = None) -> tuple[pd.DataFrame, pd.DataFrame]:
+    #     """
+    #     Load data and begin initial cleaning
+    #     """
+    #     if self.data is None:
+    #         self.load_data(filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])

-        if not self.newdata:
-            self.confine_data()
+    #     if not self.is_newdata:
+    #         self.confine_data()

-        self.remap_columns()
+    #     self.remap_columns()

-        # We have some non-standard construction age bands which we'll clean for matching
-        if not self.newdata:
-            self.standardise_construction_age_band()
-            self.clean_missing_rooms()
+    #     # We have some non-standard construction age bands which we'll clean for matching
+    #     if not self.is_newdata:
+    #         self.standardise_construction_age_band()
+    #         self.clean_missing_rooms()

-        self.recast_df_columns(
-            column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
-        )
+    #     self.recast_df_columns(
+    #         column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
+    #     )

-        if not self.newdata:
-            self.clean_multi_glaze_proportion()
+    #     if not self.is_newdata:
+    #         self.clean_multi_glaze_proportion()

-        self.clean_photo_supply()
+    #     self.clean_photo_supply()

-        if not self.newdata:
-            self.retain_multiple_epc_properties(
-                epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"]
-            )
+    #     if not self.is_newdata:
+    #         self.retain_multiple_epc_properties(
+    #             epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"]
+    #         )

-        if DATA_PROCESSOR_SETTINGS["epc_minimum_count"] >= 1:
-            # If we have multiple EPC records, we can try and do filling
-            self.fill_na_fields()
+    #     if DATA_PROCESSOR_SETTINGS["epc_minimum_count"] >= 1:
+    #         # If we have multiple EPC records, we can try and do filling
+    #         self.fill_na_fields()

-        if not self.newdata:
-            self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
+    #     if not self.is_newdata:
+    #         self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)

-        # Final re-casting after data transformed and prepared
-        coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.newdata else COLUMNTYPES
-        for k, v in coltypes.items():
-            self.data[k] = self.data[k].astype(v)
-        self.data = self.data.astype(coltypes)
+    #     # Final re-casting after data transformed and prepared
+    #     coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.is_newdata else
+    #     COLUMNTYPES
+    #     for k, v in coltypes.items():
+    #         self.data[k] = self.data[k].astype(v) 
+    #     self.data = self.data.astype(coltypes)

-        self.na_remapping()
+    #     self.na_remapping()

-        return self.data
+    #     self.cleaning_averages = None
+    #     if not self.is_newdata:
+    #         # We have some odd cases with missing constituency so we fill
+    #         self.data = self.data.fillna({"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]})

-    def na_remapping(self):
+    #         self.cleaning_averages = self.make_cleaning_averages()
+    #         # We apply averages cleaning to the data
+    #         self.data = self.apply_averages_cleaning(
+    #             data_to_clean=self.data,
+    #             cleaning_data=self.cleaning_averages,
+    #             cols_to_merge_on=COLUMNS_TO_MERGE_ON
+    #         )
+
+    #         self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
+    #         self.cleaning_averages.columns = self.cleaning_averages.columns.str.lower()
+
+    #     self.data.columns = self.data.columns.str.lower()
+
+    #     return self.data, self.cleaning_averages
+
+    def na_remapping(self, auto_subset_columns: bool = False):

        fill_na_map_apply = {
            k: v for k, v in fill_na_map.items() if k in self.data.columns
-        } if self.newdata else fill_na_map
+        } if auto_subset_columns else fill_na_map

        for column, fill_value in fill_na_map_apply.items():
            self.data[column] = self.data[column].fillna(fill_value)
@ -243,35 +466,15 @@ class DataProcessor:
            ["FLOOR_HEIGHT", "TOTAL_FLOOR_AREA"]
        ].replace("", None)

-    def remap_columns(self):
+    def make_cleaning_averages(self, ignore_step: bool = False) -> pd.DataFrame:
        """
-        Remap all columns, for any non values
+        Create a dataset to hold averages based on property type, built form, construction age, and rooms.
+        Not require in newdata mode
        """

-        # Map all anomaly values to None
-        data_anomaly_map = dict(
-            zip(
-                Definitions.DATA_ANOMALY_MATCHES,
-                [None] * len(Definitions.DATA_ANOMALY_MATCHES),
-            )
-        )
+        if ignore_step:
+            return pd.DataFrame()

-        # Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
-        data = self.data.replace(data_anomaly_map)
-        data = data.replace(np.NAN, None)
-
-        # Remap certain columns
-        if not self.newdata:
-            data["FLOOR_LEVEL"] = data["FLOOR_LEVEL"].replace(FLOOR_LEVEL_MAP)
-        data["BUILT_FORM"] = data["BUILT_FORM"].replace(BUILT_FORM_REMAP)
-
-        convert_to_lower = ["TRANSACTION_TYPE"]
-        for col in convert_to_lower:
-            data[col] = data[col].str.lower()
-
-        self.data = data
-
-    def make_cleaning_averages(self) -> pd.DataFrame:
        # Define a custom function to calculate the median, excluding missing values
        def median_without_missing(group):
            return group[AVERAGE_FIXED_FEATURES].median(skipna=True)
@ -368,13 +571,20 @@ class DataProcessor:
        #     "FLOOR_HEIGHT"
        # ].fillna(FLOOR_HEIGHT_NATIONAL_AVERAGE)

-        return cleaning_averages_filled
+        self.cleaning_averages = cleaning_averages_filled

-    def retain_multiple_epc_properties(self, epc_minimum_count: int = 1) -> None:
+    def retain_multiple_epc_properties(self, epc_minimum_count: int = 1, ignore_step: bool = False) -> None:
        """
        Reduce the data futher by keeping only datasets with multiple epcs
        """

+        if self.violation_mode:
+            # TODO: to fill in
+            return
+
+        if ignore_step:
+            return
+
        counts = self.data.groupby("UPRN").size().reset_index()
        counts.columns = ["UPRN", "count"]

@ -382,22 +592,81 @@ class DataProcessor:
        counts = counts[counts["count"] > epc_minimum_count]
        self.data = pd.merge(self.data, counts, on="UPRN")

-    def recast_df_columns(self, column_mappings: dict) -> None:
+    def recast_df_columns(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
        """
        Recast columns from the dataframe to ensure the behaviour we want
        """
+        if auto_subset_columns:
+            column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}

        for key, values in column_mappings.items():
            if key not in self.data.columns:
                raise ValueError("Column mapping incorrectly specified")
-            for value in values:
-                self.data[key] = self.data[key].astype(value)
+            if isinstance(values, list):
+                for value in values:
+                    self.data[key] = self.data[key].astype(value)
+            else:
+                self.data[key] = self.data[key].astype(values)

-    def confine_data(self) -> None:
+    def recast_all_data(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
+        """
+        Using a dictionary to recast all columns at once
+        """
+
+        if auto_subset_columns:
+            column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
+
+        self.data = self.data.astype(column_mappings)
+
+    def confine_data(self, ignore_step: bool = False):
        """
        Include all step to reduce down the data based on assumptions
        """

+        if self.violation_mode:
+            violation_uprn_missing = pd.isnull(self.data["UPRN"])
+            violation_old_lodgment_date = self.data["LODGEMENT_DATE"] < EARLIEST_EPC_DATE
+            violation_invalid_transaction_type = self.data["TRANSACTION_TYPE"] == IGNORED_TRANSACTION_TYPES
+            violation_ignored_floor_level = self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
+            violation_rdsap_score_above_max = self.data[RDSAP_RESPONSE] > MAX_SAP_SCORE
+            violation_missing_windows_description = pd.isnull(self.data["WINDOWS_DESCRIPTION"])
+            violation_missing_hotwater_description = pd.isnull(self.data["HOTWATER_DESCRIPTION"])
+            violation_missing_roof_description = pd.isnull(self.data["ROOF_DESCRIPTION"])
+            violation_invalid_property_type = self.data["PROPERTY_TYPE"] == IGNORED_PROPERTY_TYPES
+            violation_invalid_tenure = self.data["TENURE"].isin(IGNORED_TENURES)
+
+            violation_df = pd.concat(
+                [
+                    violation_uprn_missing,
+                    violation_old_lodgment_date,
+                    violation_invalid_transaction_type,
+                    violation_ignored_floor_level,
+                    violation_rdsap_score_above_max,
+                    violation_missing_windows_description,
+                    violation_missing_hotwater_description,
+                    violation_missing_roof_description,
+                    violation_invalid_property_type,
+                    violation_invalid_tenure,
+                ], axis=1,
+                keys=[
+                    "violation_uprn_missing",
+                    "violation_old_lodgment_date",
+                    "violation_invalid_transaction_type",
+                    "violation_ignored_floor_level",
+                    "violation_rdsap_score_above_max",
+                    "violation_missing_windows_description",
+                    "violation_missing_hotwater_description",
+                    "violation_missing_roof_description",
+                    "violation_invalid_property_type",
+                    "violation_invalid_tenure"
+                ]
+            )
+
+            self.data = pd.concat([self.data, violation_df], axis=1)
+
+        if ignore_step:
+            return
+
        # Filter 1: UPRN is a unique identifier for a property, so we remove any EPCs that don't have one

        # Filter 2: Lodgement date is the date the EPC was lodged, so we remove any EPCs that were lodged
@ -416,9 +685,9 @@ class DataProcessor:

        self.data = self.data[~pd.isnull(self.data["UPRN"])]
        self.data = self.data[self.data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
-        self.data = self.data[self.data["TRANSACTION_TYPE"] != "new dwelling"]
+        self.data = self.data[self.data["TRANSACTION_TYPE"] != IGNORED_TRANSACTION_TYPES]
        self.data = self.data[
-            ~self.data["FLOOR_LEVEL"].isin(["top floor", "mid floor"])
+            ~self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
        ]
        self.data = self.data[self.data[RDSAP_RESPONSE] <= MAX_SAP_SCORE]

@ -430,16 +699,30 @@ class DataProcessor:
        # Because park homes are surveyed unusually (for example, we don't have u-values to
        # look up for their different components, they need to be collected in survey and aren't reflected in
        # EPCs) we'll ignore them from the model
-        self.data = self.data[self.data["PROPERTY_TYPE"] != "Park home"]
+        self.data = self.data[self.data["PROPERTY_TYPE"] != IGNORED_PROPERTY_TYPES]

-    def clean_multi_glaze_proportion(self) -> None:
+        # We remove EPCs where the tenure is unknown, but is usually an indicator of a new build
+        self.data = self.data[~self.data["TENURE"].isin(IGNORED_TENURES)]
+
+        # We remap zero values to None
+        self.data.loc[self.data['FLOOR_HEIGHT'] == 0, 'FLOOR_HEIGHT'] = None
+
+    def clean_multi_glaze_proportion(self, ignore_step: bool = False) -> None:
        """
        If there is no multi-glaze proportion but the windows are fully glazed, then we should assume a score of 100
        """

+        if self.violation_mode:
+            # TODO:
+            return
+
+        if ignore_step:
+            return
+
        no_multi_glaze_proportion_index = pd.isnull(
            self.data["MULTI_GLAZE_PROPORTION"]
        ) & (self.data["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
+
        self.data.loc[no_multi_glaze_proportion_index, "MULTI_GLAZE_PROPORTION"] = 100

    def clean_photo_supply(self) -> None:
@ -450,7 +733,9 @@ class DataProcessor:
        self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)

    @staticmethod
-    def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on, colnames=None):
+    def apply_averages_cleaning(
+        data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False
+    ):
        """
        Clean the input DataFrame using averages from a cleaning DataFrame.

@ -462,6 +747,9 @@ class DataProcessor:
        :return: Cleaned DataFrame.
        """

+        if ignore_step:
+            return None
+
        # The desired colnames to clean - which may not be present
        if colnames is None:
            colnames = ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"]
@ -492,12 +780,16 @@ class DataProcessor:
            how='left'
        )

+        global_averages = cleaning_data[cols_to_clean].mean()
+
        # Fill NaN values with averages
        for col in cols_to_clean:
            data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
            data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
            # If we still have missings
            data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
+            # Final step if we still have missings - use global mean
+            data_to_clean[col].fillna(global_averages[col], inplace=True)

        return data_to_clean

@ -510,8 +802,8 @@ class DataProcessor:
        :return: Pandas dataframe containing the subset of columns defined in COMPONENT_FEATURES
        """

-        if suffix not in ["_STARTING", "_ENDING"]:
-            raise Exception("Suffix should be one of _STARTING or _ENDING")
+        if suffix not in ["_starting", "_ending"]:
+            raise Exception("Suffix should be one of _starting or _ending")

        if suffix == "_STARTING":
            starting_cols = self.data[STARTING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES].copy().add_suffix(suffix)
@ -573,6 +865,7 @@ class DataProcessor:

        for col in missings.index:
            unique_values = df[col].unique()
+            # TODO: confirm this behaviour
            if True in unique_values or False in unique_values:
                df[col] = df[col].fillna(False)
            if "none" in unique_values:
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@ -0,0 +1,836 @@
+import numpy as np
+import pandas as pd
+from typing import List
+from etl.epc.Record import EPCDifferenceRecord
+from etl.epc.ValidationConfiguration import DatasetValidationConfiguration
+from etl.epc.settings import EARLIEST_EPC_DATE
+
+from recommendations.rdsap_tables import england_wales_age_band_lookup
+from recommendations.recommendation_utils import (
+    estimate_number_of_floors,
+    get_wall_u_value,
+    get_roof_u_value,
+    get_floor_u_value,
+    estimate_perimeter,
+    get_wall_type,
+)
+
+# TODO: Can probably produce this in the property change app and store in S3
+BOOLEAN_VARIABLES = [
+    "is_cavity_wall",
+    "is_filled_cavity",
+    "is_solid_brick",
+    "is_system_built",
+    "is_timber_frame",
+    "is_granite_or_whinstone",
+    "is_as_built",
+    "is_cob",
+    "is_sandstone_or_limestone",
+    "is_park_home",
+    "external_insulation",
+    "internal_insulation",
+    "is_park_home_ending",
+    "external_insulation_ending",
+    "internal_insulation_ending",
+    "is_to_unheated_space",
+    "is_to_external_air",
+    "is_suspended",
+    "is_solid",
+    "another_property_below",
+    "is_pitched",
+    "is_roof_room",
+    "is_loft",
+    "is_flat",
+    "is_thatched",
+    "is_at_rafters",
+    "has_dwelling_above",
+    "has_radiators",
+    "has_fan_coil_units",
+    "has_pipes_in_screed_above_insulation",
+    "has_pipes_in_insulated_timber_floor",
+    "has_pipes_in_concrete_slab",
+    "has_boiler",
+    "has_air_source_heat_pump",
+    "has_room_heaters",
+    "has_electric_storage_heaters",
+    "has_warm_air",
+    "has_electric_underfloor_heating",
+    "has_electric_ceiling_heating",
+    "has_community_scheme",
+    "has_ground_source_heat_pump",
+    "has_no_system_present",
+    "has_portable_electric_heaters",
+    "has_water_source_heat_pump",
+    "has_electric_heat_pump",
+    "has_micro-cogeneration",
+    "has_solar_assisted_heat_pump",
+    "has_exhaust_source_heat_pump",
+    "has_community_heat_pump",
+    "has_electric",
+    "has_mains_gas",
+    "has_wood_logs",
+    "has_coal",
+    "has_oil",
+    "has_wood_pellets",
+    "has_anthracite",
+    "has_dual_fuel_mineral_and_wood",
+    "has_smokeless_fuel",
+    "has_lpg",
+    "has_b30k",
+    "has_electricaire",
+    "has_assumed_for_most_rooms",
+    "has_underfloor_heating",
+    "has_radiators_ending",
+    "has_fan_coil_units_ending",
+    "has_pipes_in_screed_above_insulation_ending",
+    "has_pipes_in_insulated_timber_floor_ending",
+    "has_pipes_in_concrete_slab_ending",
+    "has_boiler_ending",
+    "has_air_source_heat_pump_ending",
+    "has_room_heaters_ending",
+    "has_electric_storage_heaters_ending",
+    "has_warm_air_ending",
+    "has_electric_underfloor_heating_ending",
+    "has_electric_ceiling_heating_ending",
+    "has_community_scheme_ending",
+    "has_ground_source_heat_pump_ending",
+    "has_no_system_present_ending",
+    "has_portable_electric_heaters_ending",
+    "has_water_source_heat_pump_ending",
+    "has_electric_heat_pump_ending",
+    "has_micro-cogeneration_ending",
+    "has_solar_assisted_heat_pump_ending",
+    "has_exhaust_source_heat_pump_ending",
+    "has_community_heat_pump_ending",
+    "has_electric_ending",
+    "has_mains_gas_ending",
+    "has_wood_logs_ending",
+    "has_coal_ending",
+    "has_oil_ending",
+    "has_wood_pellets_ending",
+    "has_anthracite_ending",
+    "has_dual_fuel_mineral_and_wood_ending",
+    "has_smokeless_fuel_ending",
+    "has_lpg_ending",
+    "has_b30k_ending",
+    "has_electricaire_ending",
+    "has_assumed_for_most_rooms_ending",
+    "has_underfloor_heating_ending",
+    "multiple_room_thermostats",
+    "multiple_room_thermostats_ending",
+    "is_community",
+    "no_individual_heating_or_community_network",
+    "is_community_ending",
+    "no_individual_heating_or_community_network_ending",
+]
+
+
+class BaseDataset:
+    """
+    Base class for all datasets
+    """
+
+    def __init__(self) -> None:
+        self.pipeline_steps = {}
+
+    def validate_dataset(self):
+        """
+        Validate the dataset against the validation configuration
+        """
+        self.dataset_validation: dict = DatasetValidationConfiguration
+
+    # def pipeline_factory(self, pipeline_type: str) -> dict:
+    #     """
+    #     Factory method for creating a pipeline
+    #     """
+    #     if pipeline_type not in self.pipeline_steps:
+    #         raise ValueError(f"Pipeline type {pipeline_type} not found")
+
+    #     return self.pipeline_steps[pipeline_type]
+
+
+class TrainingDataset(BaseDataset):
+    """
+    A collection of EPCDifferenceRecords can be combined into a TrainingDataset.
+    """
+
+    def __init__(
+        self, datasets: List[EPCDifferenceRecord], cleaned_lookup: dict
+    ) -> None:
+        # self.pipeline_steps = self.pipeline_factory("training")
+        self.datasets = datasets
+        self.df = pd.DataFrame([dataset.difference_record for dataset in datasets])
+
+        self._feature_generation()
+        self._drop_features()
+        self._clean_efficiency_variables()
+        self._null_validation(information="Clean Efficiency Variables")
+        self._expand_description_to_features(cleaned_lookup)
+        self._adjust_assumed_values_in_wall_descriptions()
+        self._generate_u_values_from_features()
+        # TODO: For some of the features that we clean, we have either a true, false or possibly null value
+        #       Those nulls should be False. clean_missings_after_description_process handles this but shouldn't
+        #       need to
+        self._clean_missing_values()
+        self._null_validation(information="Clean Missing Values")
+        self._remove_abnormal_change_in_floor_area()
+        self._ensure_numeric()
+        self._organise_starting_ending_columns()
+
+    def _organise_starting_ending_columns(self):
+        """
+        Organise the starting and ending columns so that they are next to each other
+        """
+        no_suffix_cols = [
+            col
+            for col in self.df.columns
+            if "_ending" not in col and "_starting" not in col
+        ]
+        starting_cols = [col for col in self.df.columns if "_starting" in col]
+        ending_cols = [col for col in self.df.columns if "_ending" in col]
+
+        common_cols = [
+            col.rsplit("_", 1)[0]
+            for col in starting_cols
+            if col.replace("_starting", "_ending") in ending_cols
+        ]
+        only_ending_cols = [
+            col
+            for col in ending_cols
+            if col.replace("_ending", "_starting") not in starting_cols
+        ]
+
+        common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
+
+        self.df = self.df.loc[
+                  :,
+                  no_suffix_cols
+                  + only_ending_cols
+                  + [col for cols in common_cols for col in cols],
+                  ]
+
+    def _remove_abnormal_change_in_floor_area(self):
+        """
+        Remove properties where the change in floor area is greater than 100%
+        """
+
+        self.df["tfa_diff_abs"] = abs(
+            self.df["total_floor_area_ending"] - self.df["total_floor_area_starting"]
+        )
+        self.df["tfa_diff_prop"] = (
+            self.df["tfa_diff_abs"] / self.df["total_floor_area_starting"]
+        )
+        self.df = self.df[self.df["tfa_diff_prop"] < 0.5]
+        self.df = self.df.drop(columns=["tfa_diff_abs", "tfa_diff_prop"])
+
+    def _ensure_numeric(self):
+        """
+        Ensure that all columns are numeric
+        """
+        # TODO: move into EPCRecord record
+        uvalue_columns = [
+            col for col in self.df.columns if "thermal_transmittance" in col
+        ]
+        for uvalue_col in uvalue_columns:
+            self.df[uvalue_col] = pd.to_numeric(self.df[uvalue_col])
+
+    @staticmethod
+    def _lambda_function_to_generate_roof_uvalue(row, is_end=False):
+        """
+        Using the apply method, use the get_roof_u_value method to generate the u-value
+        """
+
+        col_name = (
+            "roof_insulation_thickness"
+            if not is_end
+            else "roof_insulation_thickness_ending"
+        )
+
+        if row["has_dwelling_above"]:
+            if row["roof_thermal_transmittance"] != 0:
+                raise ValueError("Should have 0 u-value for roof")
+
+            if row["roof_thermal_transmittance_ending"] != 0:
+                raise ValueError("Should have 0 u-value for roof")
+
+        return get_roof_u_value(
+            insulation_thickness=row[col_name],
+            has_dwelling_above=row["has_dwelling_above"],
+            is_loft=row["is_loft"],
+            is_roof_room=row["is_roof_room"],
+            is_thatched=row["is_thatched"],
+            is_flat=row["is_flat"],
+            is_pitched=row["is_pitched"],
+            is_at_rafters=row["is_at_rafters"],
+            age_band=england_wales_age_band_lookup[row["construction_age_band"]],
+        )
+
+    @staticmethod
+    def _lambda_function_to_generate_wall_uvalue(row, is_end=False):
+        """
+        Using the apply method, use the get_wall_u_value method to generate the u-value
+        """
+        description_col_name = (
+            "walls_clean_description"
+            if not is_end
+            else "walls_clean_description_ending"
+        )
+        thermal_transistance_col_name = (
+            "walls_thermal_transmittance"
+            if not is_end
+            else "walls_thermal_transmittance_ending"
+        )
+
+        if pd.isnull(row[thermal_transistance_col_name]):
+            output = get_wall_u_value(
+                clean_description=row[description_col_name],
+                age_band=england_wales_age_band_lookup[row["construction_age_band"]],
+                is_granite_or_whinstone=row["is_granite_or_whinstone"],
+                is_sandstone_or_limestone=row["is_sandstone_or_limestone"],
+            )
+        else:
+            output = row[thermal_transistance_col_name]
+
+        return output
+
+    @staticmethod
+    def _lambda_function_to_generate_floor_uvalue(row, is_end=False):
+        """
+        Using the apply method, use the get_floor_u_value method to generate the u-value
+        """
+
+        floor_thermal_col_name = (
+            "floor_thermal_transmittance"
+            if not is_end
+            else "floor_thermal_transmittance_ending"
+        )
+
+        if row["another_property_below"]:
+            if row["floor_thermal_transmittance"] != 0:
+                raise ValueError("Should have 0 u-value for floor")
+
+            if row["floor_thermal_transmittance_ending"] != 0:
+                raise ValueError("Should have 0 u-value for floor")
+            return 0
+        else:
+            uvalue = row[floor_thermal_col_name]
+
+        if pd.isnull(uvalue):
+            insulation_col_name = (
+                "floor_insulation_thickness"
+                if not is_end
+                else "floor_insulation_thickness_ending"
+            )
+            perimeter_col_name = (
+                "estimated_perimeter_starting"
+                if not is_end
+                else "estimated_perimeter_ending"
+            )
+            floor_area_col_name = (
+                "ground_floor_area_starting"
+                if not is_end
+                else "ground_floor_area_ending"
+            )
+
+            uvalue = get_floor_u_value(
+                floor_type=row["floor_type"],
+                perimeter=row[perimeter_col_name],
+                area=row[floor_area_col_name],
+                insulation_thickness=row[insulation_col_name],
+                wall_type=row["wall_type"],
+                age_band=england_wales_age_band_lookup[row["construction_age_band"]],
+            )
+
+        return uvalue
+
+    def _generate_u_values_from_features(self):
+        """
+        Generate u-values from the features
+        """
+
+        # ~~~~~~~~~~~~~~~~~~
+        # Walls
+        # ~~~~~~~~~~~~~~~~~~
+
+        walls_starting_uvalue = self.df.apply(
+            lambda row: self._lambda_function_to_generate_wall_uvalue(row), axis=1
+        )
+        walls_ending_uvalue = self.df.apply(
+            lambda row: self._lambda_function_to_generate_wall_uvalue(row, is_end=True),
+            axis=1,
+        )
+
+        walls_starting_uvalue = self.df["walls_thermal_transmittance"].fillna(
+            walls_starting_uvalue
+        )
+        walls_starting_equals_ending_flag = (
+            self.df["walls_clean_description"]
+            == self.df["walls_clean_description_ending"]
+        )
+        walls_ending_uvalue[walls_starting_equals_ending_flag] = walls_starting_uvalue[
+            walls_starting_equals_ending_flag
+        ]
+
+        # ~~~~~~~~~~~~~~~~~~
+        # Roof
+        # ~~~~~~~~~~~~~~~~~~
+
+        roof_starting_uvalue = self.df.apply(
+            lambda row: self._lambda_function_to_generate_roof_uvalue(row), axis=1
+        )
+        roof_ending_uvalue = self.df.apply(
+            lambda row: self._lambda_function_to_generate_roof_uvalue(row, is_end=True),
+            axis=1,
+        )
+
+        roof_starting_uvalue = self.df["roof_thermal_transmittance"].fillna(
+            roof_starting_uvalue
+        )
+        roof_ending_uvalue = self.df["roof_thermal_transmittance_ending"].fillna(
+            roof_ending_uvalue
+        )
+
+        # ~~~~~~~~~~~~~~~~~~
+        # Floor
+        # ~~~~~~~~~~~~~~~~~~
+
+        self.df["estimated_number_of_floors"] = self.df.apply(
+            lambda row: estimate_number_of_floors(row["property_type"]), axis=1
+        )
+
+        self.df["ground_floor_area_starting"] = (
+            self.df["total_floor_area_starting"] / self.df["estimated_number_of_floors"]
+        )
+        self.df["ground_floor_area_ending"] = (
+            self.df["total_floor_area_ending"] / self.df["estimated_number_of_floors"]
+        )
+
+        self.df["estimated_perimeter_starting"] = self.df.apply(
+            lambda row: estimate_perimeter(
+                row["ground_floor_area_starting"],
+                row["number_habitable_rooms_starting"]
+                / row["estimated_number_of_floors"],
+            ),
+            axis=1,
+        )
+        self.df["estimated_perimeter_ending"] = self.df.apply(
+            lambda row: estimate_perimeter(
+                row["ground_floor_area_starting"],
+                row["number_habitable_rooms_ending"]
+                / row["estimated_number_of_floors"],
+            ),
+            axis=1,
+        )
+        self.df["floor_type"] = self.df["is_suspended"].replace(
+            {True: "suspended", False: "solid"}
+        )
+        self.df["wall_type"] = self.df.apply(
+            lambda row: get_wall_type(
+                is_cavity_wall=row["is_cavity_wall"],
+                is_solid_brick=row["is_solid_brick"],
+                is_timber_frame=row["is_timber_frame"],
+                is_granite_or_whinstone=row["is_granite_or_whinstone"],
+                is_cob=row["is_cob"],
+                is_sandstone_or_limestone=row["is_sandstone_or_limestone"],
+                is_system_built=row["is_system_built"],
+                is_park_home=row["is_park_home"],
+            ),
+            axis=1,
+        )
+
+        floor_starting_uvalue = self.df.apply(
+            lambda row: self._lambda_function_to_generate_floor_uvalue(row), axis=1
+        )
+        floor_ending_uvalue = self.df.apply(
+            lambda row: self._lambda_function_to_generate_floor_uvalue(
+                row, is_end=True
+            ),
+            axis=1,
+        )
+
+        floor_starting_uvalue = self.df["floor_thermal_transmittance"].fillna(
+            floor_starting_uvalue
+        )
+        floor_ending_uvalue = self.df["floor_thermal_transmittance_ending"].fillna(
+            floor_ending_uvalue
+        )
+
+        for component in ["walls", "roof", "floor"]:
+            self.df[f"{component}_thermal_transmittance"] = self.df[
+                f"{component}_thermal_transmittance"
+            ].fillna(eval(f"{component}_starting_uvalue"))
+            self.df[f"{component}_thermal_transmittance_ending"] = self.df[
+                f"{component}_thermal_transmittance_ending"
+            ].fillna(eval(f"{component}_ending_uvalue"))
+
+        self.df = self.df.drop(
+            columns=[
+                "floor_type",
+                "wall_type",
+                "walls_clean_description",
+                "walls_clean_description_ending",
+                "estimated_number_of_floors",
+                "ground_floor_area_starting",
+                "ground_floor_area_ending",
+            ]
+        )
+
+    def _adjust_assumed_values_in_wall_descriptions(self):
+        """
+        Strip out assumed values for all wall descriptions
+        """
+        for col in ["walls_clean_description", "walls_clean_description_ending"]:
+            self.df[col] = (
+                self.df[col].str.replace("(assumed)", "", regex=False).str.rstrip()
+            )
+
+    def _drop_inconsistent_properties(self, expanded_df: pd.DataFrame, component: str):
+        """
+        Drop properties that have inconsistent data, i.e. changing material types
+        """
+
+        if component == "walls":
+            expanded_df = expanded_df[
+                (expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"])
+                & (
+                    expanded_df["is_solid_brick"]
+                    == expanded_df["is_solid_brick_ending"]
+                )
+                & (
+                    expanded_df["is_timber_frame"]
+                    == expanded_df["is_timber_frame_ending"]
+                )
+                & (
+                    expanded_df["is_granite_or_whinstone"]
+                    == expanded_df["is_granite_or_whinstone_ending"]
+                )
+                & (expanded_df["is_cob"] == expanded_df["is_cob_ending"])
+                & (
+                    expanded_df["is_sandstone_or_limestone"]
+                    == expanded_df["is_sandstone_or_limestone_ending"]
+                )
+                ]
+        elif component == "floor":
+            expanded_df = expanded_df[
+                (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
+                & (expanded_df["is_solid"] == expanded_df["is_solid_ending"])
+                & (
+                    expanded_df["another_property_below"]
+                    == expanded_df["another_property_below_ending"]
+                )
+                & (
+                    expanded_df["is_to_unheated_space"]
+                    == expanded_df["is_to_unheated_space_ending"]
+                )
+                & (
+                    expanded_df["is_to_external_air"]
+                    == expanded_df["is_to_external_air_ending"]
+                )
+                ]
+        elif component == "roof":
+            expanded_df = expanded_df[
+                (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
+                & (expanded_df["is_roof_room"] == expanded_df["is_roof_room_ending"])
+                & (expanded_df["is_loft"] == expanded_df["is_loft_ending"])
+                & (expanded_df["is_flat"] == expanded_df["is_flat_ending"])
+                & (expanded_df["is_thatched"] == expanded_df["is_thatched_ending"])
+                & (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"])
+                & (
+                    expanded_df["has_dwelling_above"]
+                    == expanded_df["has_dwelling_above_ending"]
+                )
+                ]
+
+        return expanded_df
+
+    def _expand_description_to_features(self, cleaned_lookup: dict):
+        """
+        This method will merge on the cleaned lookup table and ensure that the building fabric in the
+        starting and ending EPC is consistent, so ensure that we are performing our modelling on the cleanest
+        possible dataset.
+        # We look for key building fabric features that have changed from one EPC to the next.
+        # if, for example, we see that a home has gone from being a cavity wall to a solid wall, we
+        # remove this record, as it indicates that the quality of the EPC conducted in the first instance
+        # is low
+        # We also replace descriptions with their cleaned variants
+        """
+
+        cols_to_drop = {
+            "walls": [
+                # We need to cleaned descriptions for pulling out u-values
+                "original_description",
+                "thermal_transmittance_unit",
+                "original_description_ending",
+                "thermal_transmittance_unit_ending",
+                "is_cavity_wall_ending",
+                "is_solid_brick_ending",
+                "is_system_built_ending",
+                "is_timber_frame_ending",
+                "is_granite_or_whinstone_ending",
+                "is_as_built_ending",
+                "is_cob_ending",
+                "is_assumed_ending",
+                "is_sandstone_or_limestone_ending",
+                # Re remove the is_assumed columns
+                "is_assumed",
+                "is_assumed_ending",
+            ],
+            "floor": [
+                "original_description",
+                "clean_description",
+                "thermal_transmittance_unit",
+                "no_data",
+                "no_data_ending",
+                "original_description_ending",
+                "clean_description_ending",
+                "thermal_transmittance_unit_ending",
+                "is_suspended_ending",
+                "is_solid_ending",
+                "another_property_below_ending",
+                "is_to_unheated_space_ending",
+                "is_to_external_air_ending",
+                "is_assumed",
+                "is_assumed_ending",
+            ],
+            "roof": [
+                "original_description",
+                "clean_description",
+                "thermal_transmittance_unit",
+                "is_assumed",
+                "is_valid",
+                "original_description_ending",
+                "clean_description_ending",
+                "thermal_transmittance_unit_ending",
+                "is_pitched_ending",
+                "is_roof_room_ending",
+                "is_loft_ending",
+                "is_flat_ending",
+                "is_thatched_ending",
+                "has_dwelling_above_ending",
+                "is_assumed_ending",
+                "is_valid_ending",
+            ],
+            "hotwater": [
+                "original_description",
+                "clean_description",
+                "assumed",
+                "original_description_ending",
+                "clean_description_ending",
+                "assumed_ending",
+            ],
+            "mainheat": [
+                "original_description",
+                "clean_description",
+                "original_description_ending",
+                "has_assumed",
+                "original_description_ending",
+                "clean_description_ending",
+                "has_assumed_ending",
+            ],
+            "mainheatcont": [
+                "original_description",
+                "clean_description",
+                "original_description_ending",
+                "clean_description_ending",
+            ],
+            "windows": [
+                "original_description",
+                "clean_description",
+                "original_description_ending",
+                "clean_description_ending",
+                # We don't need many of the glazing coverage features because we have the multi_glaze_proportion feature
+                "has_glazing",
+                "glazing_coverage",
+                "no_data",
+                "has_glazing_ending",
+                "glazing_coverage_ending",
+                "no_data_ending",
+            ],
+            "main-fuel": [
+                "original_description",
+                "clean_description",
+                "original_description_ending",
+                "clean_description_ending",
+            ],
+        }
+
+        components_to_expand = cols_to_drop.keys()
+
+        for component in components_to_expand:
+            # TODO: change cleaned dataframe to have underscores instead of dashes
+            if component == "main-fuel":
+                cleaned_key = "main-fuel"
+                left_on_starting = "main_fuel_starting"
+                left_on_ending = "main_fuel_ending"
+                original_cols = ["main_fuel_starting", "main_fuel_ending"]
+            else:
+                cleaned_key = f"{component}-description"
+                left_on_starting = f"{component}_description_starting"
+                left_on_ending = f"{component}_description_ending"
+                original_cols = [
+                    f"{component}_description_starting",
+                    f"{component}_description_ending",
+                ]
+
+            cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key])
+
+            expanded_df = self.df.merge(
+                cleaned_lookup_df_for_key,
+                how="left",
+                left_on=left_on_starting,
+                right_on="original_description",
+            ).merge(
+                cleaned_lookup_df_for_key,
+                how="left",
+                left_on=left_on_ending,
+                right_on="original_description",
+                suffixes=("", "_ending"),
+            )
+
+            # Drop properties where key material types have changed
+            expanded_df = self._drop_inconsistent_properties(expanded_df, component)
+
+            # Drop original cols and cols to drop
+            expanded_df = expanded_df.drop(
+                columns=cols_to_drop[component] + original_cols
+            )
+
+            # Rename columns to component specific names, if they have not been dropped
+            expanded_df = expanded_df.rename(
+                columns={
+                    "insulation_thickness": f"{component}_insulation_thickness",
+                    "insulation_thickness_ending": f"{component}_insulation_thickness_ending",
+                    "thermal_transmittance": f"{component}_thermal_transmittance",
+                    "thermal_transmittance_ending": f"{component}_thermal_transmittance_ending",
+                    "tariff_type": f"{component}_tariff_type",
+                    "tariff_type_ending": f"{component}_tariff_type_ending",
+                    "clean_description": f"{component}_clean_description",
+                    "clean_description_ending": f"{component}_clean_description_ending",
+                }
+            )
+            self.df = expanded_df
+
+        # We don't need any lighting specific cleaning, we just drop the original description as we use
+        # LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING
+        self.df = self.df.drop(
+            columns=["lighting_description_starting", "lighting_description_ending"]
+        )
+
+    def _clean_missing_values(self, ignore_cols=None):
+        missings = pd.isnull(self.df).sum()
+        missings = missings[missings > 0]
+
+        if ignore_cols:
+            missings = missings[~missings.index.isin(ignore_cols)]
+
+        for col in missings.index:
+            unique_values = self.df[col].unique()
+            if (
+                (True in unique_values)
+                or (False in unique_values)
+                or (col in BOOLEAN_VARIABLES)
+            ):
+                self.df[col] = self.df[col].fillna(False)
+            if "none" in unique_values:
+                self.df[col] = self.df[col].fillna("none")
+            else:
+                self.df[col] = self.df[col].fillna("Unknown")
+
+    def _null_validation(self, information: str):
+        print(f"Null validation after {information}")
+        if pd.isnull(self.df).sum().sum():
+            raise ValueError(f"Null values found in dataset, after step {information}")
+
+    def _drop_features(self):
+        """
+        Drop features that are not needed for modelling
+        """
+        self.df = self.df.drop(
+            columns=["lodgement_date_starting", "lodgement_date_ending"]
+        )
+
+    def _feature_generation(self):
+        """
+        Generate features for modelling
+        """
+        self.df["days_to_starting"] = self._calculate_days_to(
+            self.df["lodgement_date_starting"]
+        )
+        self.df["days_to_ending"] = self._calculate_days_to(
+            self.df["lodgement_date_ending"]
+        )
+
+    def _clean_efficiency_variables(self):
+        """
+        These is scope to clean this by the model per corresponding description.
+        E.g. for WALLS_ENG_EFF we could look at the mode efficiency rating by description and
+        fill in the missing values with this.
+        When looking at this initially, there are a large volume of records with missing energy efficiency
+        values and therefore a simpler approach was taken just to test including these variables
+        :param df:
+        :return:
+        """
+
+        missings = pd.isnull(self.df).sum()
+        missings = missings[missings >= 1]
+
+        if len(missings) == 0:
+            return
+
+            # Make sure they are all efficiency columns
+        if any(~missings.index.str.contains("energy_eff")):
+            raise ValueError("Non efficiency columns are missing")
+
+        for m in missings.index:
+            self.df[m] = self.df[m].fillna("NO_RATING")
+
+    @staticmethod
+    def _calculate_days_to(lodgement_date):
+        if isinstance(lodgement_date, str):
+            return (
+                pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
+            ).days
+
+        return (
+            pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
+        ).dt.days
+
+    # def __add__(self, other) -> "TrainingDataset":
+    #     if not isinstance(other, TrainingDataset):
+    #         raise TypeError("Addition can only be performed with another instance of TrainingDataset")
+    #     return TrainingDataset(self.datasets + other.datasets)
+
+    # def __radd__(self, other):
+    #     """
+    #     Required for sum() to work
+    #     """
+    #     if isinstance(other, int):
+    #         return self
+    #     else:
+    #         return self.__add__(other)
+
+
+class NewDataset(BaseDataset):
+    """
+    A collection of EPCDifferenceRecords can be combined into a ScoringDataset.
+    """
+
+    def __init__(self, datasets: List[EPCDifferenceRecord]) -> None:
+        # self.pipeline_steps = self.pipeline_factory("newdata")
+        self.datasets = datasets
+
+    def __add__(self, other) -> "NewDataset":
+        if not isinstance(other, NewDataset):
+            raise TypeError(
+                "Addition can only be performed with another instance of ScoringDataset"
+            )
+        return NewDataset(self.datasets + other.datasets)
+
+    def __radd__(self, other):
+        """
+        Required for sum() to work
+        """
+        if isinstance(other, int):
+            return self
+        else:
+            return self.__add__(other)
--- a/etl/epc/Pipeline.py
+++ b/etl/epc/Pipeline.py
@ -0,0 +1,410 @@
+import msgpack
+import pandas as pd
+from datetime import datetime
+
+from typing import List
+from pathlib import Path
+from tqdm import tqdm
+import multiprocessing as mp
+
+from etl.epc.DataProcessor import EPCDataProcessor
+from etl.epc.Record import EPCRecord, EPCDifferenceRecord
+from etl.epc.Dataset import TrainingDataset
+from utils.s3 import save_dataframe_to_s3_parquet, read_from_s3
+from etl.epc.settings import (
+    MANDATORY_FIXED_FEATURES,
+    LATEST_FIELD,
+    COMPONENT_FEATURES,
+    RDSAP_RESPONSE,
+    HEAT_DEMAND_RESPONSE,
+    CARBON_RESPONSE,
+    CORE_COMPONENT_FEATURES,
+    EFFICIENCY_FEATURES,
+    POTENTIAL_COLUMNS,
+    ROOM_FEATURES,
+)
+
+# TODO: change in setting file
+MANDATORY_FIXED_FEATURES = [x.lower() for x in MANDATORY_FIXED_FEATURES]
+# LATEST_FIELD = [x.lower() for x in LATEST_FIELD if x.lower() not in ROOM_FEATURES]
+LATEST_FIELD = [x.lower() for x in LATEST_FIELD]
+COMPONENT_FEATURES = [x.lower() for x in COMPONENT_FEATURES]
+RDSAP_RESPONSE = RDSAP_RESPONSE.lower()
+HEAT_DEMAND_RESPONSE = HEAT_DEMAND_RESPONSE.lower()
+CARBON_RESPONSE = CARBON_RESPONSE.lower()
+CORE_COMPONENT_FEATURES = [x.lower() for x in CORE_COMPONENT_FEATURES]
+EFFICIENCY_FEATURES = [x.lower() for x in EFFICIENCY_FEATURES]
+POTENTIAL_COLUMNS = [x.lower() for x in POTENTIAL_COLUMNS]
+VARIABLE_DATA_FEATURES = (
+    COMPONENT_FEATURES
+    + ROOM_FEATURES
+    + EFFICIENCY_FEATURES
+    + POTENTIAL_COLUMNS
+    + ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
+)
+
+
+def get_cleaned_description_mapping():
+    """
+    This function will retrieve the cleaned dataset from s3 which has the cleaned
+    descriptions for the epc dataset
+
+    This data is stored in MessagePack format and therefore needs to be decoded
+    :return:
+    """
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson", bucket_name="retrofit-data-dev"
+    )
+
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    return cleaned
+
+
+clean_lookup = get_cleaned_description_mapping()
+
+
+class EPCPipeline:
+    """
+    This class will take a list of directories and process them to create a dataset:
+    - Load the data
+    - Pre-process the data
+    - Create a dataset
+    - Clean the dataset
+    - Store the dataset
+    """
+
+    def __init__(
+        self,
+        epc_data_processor: EPCDataProcessor,
+        api_epc_records: dict = None,
+        directories: List[Path] | None = None,
+        run_mode="training",
+        epc_local_file="certificates.csv",
+        epc_bucket_name="retrofit-data-dev",
+        epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet",
+        epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet",
+        epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet",
+        use_parallel=False,
+    ):
+        """
+        :param directories: List of directories to process
+        :param epc_data_processor: EPCDataProcessor object
+        :param run_mode: Either training or newdata
+        :param epc_local_file: Local file name of the EPC data
+        :param epc_bucket_name: S3 bucket name
+        :param epc_cleaning_dataset_key: S3 key for the cleaning dataset
+        :param epc_all_equal_rows_key: S3 key for the all equal rows dataset
+        :param epc_compiled_dataset_key: S3 key for the compiled dataset
+        """
+        self.compiled_dataset: pd.DataFrame = pd.DataFrame()
+        self.compiled_all_equal_rows: list = []
+        self.compiled_cleaning_averages: list = []
+
+        self.directories = directories
+        self.epc_data_processor = epc_data_processor
+        self.api_epc_records = api_epc_records
+        self.run_mode = run_mode
+        self.epc_local_file = epc_local_file
+        self.epc_bucket_name = epc_bucket_name
+
+        self.use_parallel = use_parallel
+        self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+
+        self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix)
+        self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix)
+        self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix)
+
+    def run(self):
+        """
+        Entrypoint to run the pipeline
+        """
+        if self.run_mode == "training":
+            self.run_training_dataset_pipeline()
+        elif self.run_mode == "newdata":
+            self.run_newdata_dataset_pipeline()
+        else:
+            raise ValueError("Run mode defined needs to be in 'training' or 'newdata'")
+
+    def run_newdata_dataset_pipeline(self):
+        """
+        Main function to run the newdata pipeline
+        """
+        prepared_epc = EPCRecord(
+            self.api_epc_records, run_mode="newdata"
+        )  # This uses all the epc records to clean the data
+
+        self.epc_data_processor.insert_data(prepared_epc)
+        self.epc_data_processor.prepare_data()
+
+        data = self.epc_data_processor.data
+
+        epc_records = [
+            EPCRecord(**x, run_mode="newdata") for x in data.to_dict(orient="records")
+        ]
+
+    def run_training_dataset_pipeline(self):
+        """
+        Main function to run the training dataset generation pipeline
+        """
+        if self.directories is None:
+            raise ValueError(
+                "Directories not specified - Unable to run Training pipeline"
+            )
+
+        if self.use_parallel:
+            self.run_training_dataset_parallel_pipeline()
+        else:
+            for directory in tqdm(self.directories):
+                self.process_directory(directory)
+
+        save_dataframe_to_s3_parquet(
+            df=self.compiled_dataset,
+            bucket_name=self.epc_bucket_name,
+            file_key=self.epc_compiled_dataset_key,
+        )
+
+        save_dataframe_to_s3_parquet(
+            df=pd.DataFrame(self.compiled_all_equal_rows),
+            bucket_name=self.epc_bucket_name,
+            file_key=self.epc_all_equal_rows_key,
+        )
+
+        save_dataframe_to_s3_parquet(
+            df=pd.concat(self.compiled_cleaning_averages),
+            bucket_name=self.epc_bucket_name,
+            file_key=self.epc_cleaning_dataset_key,
+        )
+
+    def run_training_dataset_parallel_pipeline(self):
+        """
+        Run the training pipeline in parallel
+        """
+
+        with mp.Pool() as pool:
+            results = list(
+                tqdm(
+                    pool.imap(self.process_directory_task, self.directories),
+                    total=len(self.directories),
+                ),
+            )
+
+        for result in tqdm(results):
+            self.compiled_dataset = pd.concat(
+                [self.compiled_dataset, result["dataset"]]
+            )
+            self.compiled_cleaning_averages.append(result["cleaning_averages"])
+            self.compiled_all_equal_rows.extend(result["all_equal_rows"])
+
+    def process_directory_task(self, directory: str) -> pd.DataFrame:
+        """
+        Task to enable parallel processing
+        """
+
+        self.process_directory(directory=directory)
+
+        output = {
+            "dataset": self.compiled_dataset,
+            "cleaning_averages": self.epc_data_processor.cleaning_averages,
+            "all_equal_rows": self.compiled_all_equal_rows,
+        }
+
+        return output
+
+    def process_directory(self, directory: Path):
+        """
+        Process a single directory
+        :param directory:
+        :return:
+        """
+        filepath = directory / self.epc_local_file
+
+        self.epc_data_processor.prepare_data(filepath=filepath)
+
+        constituency_data = self.epc_data_processor.data
+
+        self.compiled_cleaning_averages.append(
+            self.epc_data_processor.cleaning_averages
+        )
+
+        constituency_difference_records = []
+
+        for uprn, property_data in constituency_data.groupby("uprn", observed=True):
+            difference_records = self.process_uprn(
+                uprn=str(uprn), property_data=property_data, directory=directory
+            )
+            if difference_records is not None:
+                constituency_difference_records.extend(difference_records)
+
+        constituency_dataset = TrainingDataset(
+            datasets=constituency_difference_records, cleaned_lookup=clean_lookup
+        )
+
+        self.compiled_dataset = pd.concat(
+            [self.compiled_dataset, constituency_dataset.df]
+        )
+
+    def process_uprn(self, uprn: str, property_data: pd.DataFrame, directory: Path):
+        """
+        Process a single UPRN, which may have multiple different EPCs
+        :param uprn: UPRN
+        :param property_data: pd.DataFrame, Data for a single UPRN
+        :param directory: Path, Directory of the UPRN
+        :return:
+        """
+        # If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
+        if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1) or (
+            pd.isnull(property_data[MANDATORY_FIXED_FEATURES]).sum().sum() > 0
+        ):
+            return None
+
+        # Fixed features - these are property attributes that shouldn't change over time
+        # Take the latest row for both the LATEST_FEILDS and MANDATORY FIELDS and combine all fields together
+        fixed_data = (
+            property_data[MANDATORY_FIXED_FEATURES + LATEST_FIELD].iloc[-1].to_dict()
+        )
+
+        # We include the lodgement date here as we probably need to factor time into the
+        # model, since EPC standards and rigour have changed over time
+        variable_data = property_data[VARIABLE_DATA_FEATURES]
+
+        uprn = str(uprn)
+        epc_records = [
+            EPCRecord(uprn, **x, run_mode="training")
+            for x in variable_data.to_dict(orient="records")
+        ]
+
+        # TODO: We want to be able to provide value for the u values in the main pipeline so this will need to be part of the EPCRecord
+
+        # We can use multiple types of comparison datasets - i.e. Compare consecutive records, or compare all permutations of records
+        property_difference_records = self._generate_property_difference_records(
+            epc_records, uprn, directory, fixed_data
+        )
+
+        return property_difference_records
+
+    def _generate_property_difference_records(
+        self, epc_records: List[EPCRecord], uprn: str, directory: Path, fixed_data: dict
+    ):
+        """
+        We can use multiple types of comparison datasets, for example:
+        - First vs second
+        - Second vs third
+        - First vs third
+        :param epc_records:
+        :return:
+        """
+
+        property_difference_records: list = []
+
+        # property_difference_records = self._compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, property_difference_records)
+
+        property_difference_records = self._compare_all_permutation_epcs(
+            epc_records, uprn, directory, fixed_data, property_difference_records
+        )
+
+        return property_difference_records
+
+    def _compare_all_permutation_epcs(
+        self,
+        epc_records: List[EPCRecord],
+        uprn: str,
+        directory: Path,
+        fixed_data: dict,
+        property_difference_records: list,
+    ):
+        """
+        Compare all permutations of EPCs for a given UPRN
+        :param epc_records:
+        :return:
+        """
+
+        for idx in range(0, len(epc_records) - 1):
+            for idx2 in range(idx + 1, len(epc_records)):
+                earliest_record: EPCRecord = epc_records[idx]
+                latest_record: EPCRecord = epc_records[idx2]
+
+                # Auto sort the records so that the record with highest RDSAP score is always record1
+                difference_record: EPCDifferenceRecord = (
+                    latest_record.create_EPCDifferenceRecord(
+                        other=earliest_record, fixed_data=fixed_data
+                    )
+                )
+                # difference_record: EPCDifferenceRecord = latest_record - earliest_record
+                # # TODO: Use method above instead of overloading operator
+                # difference_record.append_fixed_data(fixed_data)
+
+                # TODO: Pull out RDSAP_CHANGE to a variable
+                if difference_record.get("rdsap_change") == 0:
+                    if not difference_record.ensure_adequate_data():
+                        # Rdsap hasn't changed but we have enough data to use this record
+                        # i.e. all fields aside from mechnical ventilation are the same]
+                        # self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record})
+                        continue
+
+                all_equal = difference_record.compare_fields_in_records(
+                    fields=[x.lower() for x in CORE_COMPONENT_FEATURES]
+                )
+
+                if all_equal:
+                    # Keep track of this for the moment so we can analyse
+                    self.compiled_all_equal_rows.append(
+                        {"uprn": uprn, "directory_name": directory.name}
+                    )
+                    continue
+
+                property_difference_records.append(difference_record)
+
+        return property_difference_records
+
+    def _compare_consecutive_epcs(
+        self,
+        epc_records: List[EPCRecord],
+        uprn: str,
+        directory: Path,
+        fixed_data: dict,
+        property_difference_records: list,
+    ):
+        """
+        Compare consecutive EPCs for a given UPRN
+        :param epc_records:
+        :return:
+        """
+
+        for idx in range(0, len(epc_records) - 1):
+            if idx >= len(epc_records) - 1:
+                break
+
+            earliest_record: EPCRecord = epc_records[idx]
+            latest_record: EPCRecord = epc_records[idx + 1]
+
+            # Auto sort the records so that the record with highest RDSAP score is always record1
+            difference_record: EPCDifferenceRecord = latest_record - earliest_record
+            # TODO: Use method above instead of overloading operator
+            difference_record.append_fixed_data(fixed_data)
+
+            # TODO: Pull out RDSAP_CHANGE to a variable
+            if difference_record.get("rdsap_change") == 0:
+                if not difference_record.ensure_adequate_data():
+                    # Rdsap hasn't changed but we have enough data to use this record
+                    # i.e. all fields aside from mechnical ventilation are the same]
+                    # self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record})
+                    continue
+
+            all_equal = difference_record.compare_fields_in_records(
+                fields=[x.lower() for x in CORE_COMPONENT_FEATURES]
+            )
+
+            if all_equal:
+                # Keep track of this for the moment so we can analyse
+                self.compiled_all_equal_rows.append(
+                    {"uprn": uprn, "directory_name": directory.name}
+                )
+                continue
+
+            # difference_record.append_fixed_data(fixed_data)
+
+            property_difference_records.append(difference_record)
+
+        return property_difference_records
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
--- a/etl/epc/ValidationConfiguration.py
+++ b/etl/epc/ValidationConfiguration.py
@ -0,0 +1,61 @@
+"""
+Specify the validation rules for each field in the differents record.
+"""
+
+def validate_walls_description(value):
+    if value not in ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"]:
+        raise ValueError("Walls description is not valid")
+
+EPCRecordValidationConfiguration = {
+    "WALLS_DESCRIPTION": {
+        "type": "string",
+        "acceptable_values": ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"],
+        "function": validate_walls_description 
+    },
+    "FLOOR_DESCRIPTION": {
+        "type": "string",
+        "acceptable_values": ["Solid", "Suspended", "Other"]
+    },
+    "ENERGY_CONSUMPTION_CURRENT": {
+        "type": "float",
+        "range": [0, 100]
+    }
+}
+
+EPCDifferenceRecordValidationConfiguration = {
+}
+
+EPCDifferenceRecordFixedDataValidationConfiguration = {
+    "PROPERTY_TYPE": {
+        "type": "string",
+        "acceptable_values": ["House", "Flat", "Bungalow", "Maisonette", "Park home", "Other"]
+    },
+    "BUILT_FORM": {
+        "type": "string",
+        "acceptable_values": ["Detached", "Semi-Detached", "End-Terrace", "Mid-Terrace", "Enclosed Mid-Terrace", "Enclosed End-Terrace", "Enclosed Detached", "Not applicable"]
+    },
+    "CONSITUENCY": {
+        "type": "string",
+        "acceptable_values": ["England", "Wales", "Scotland", "Northern Ireland"]
+    },
+    "NUMBER_HABITABLE_ROOMS": {
+        "type": "integer",
+        "range": [0, 100]
+    },
+    "NUMBER_HEATED_ROOMS": {
+        "type": "integer",
+        "range": [0, 100]
+    },
+    "FIXED_LIGHTING_OUTLETS_COUNT": {
+        "type": "integer",
+        "range": [0, 100]
+    },
+    "CONSTRUCTION_AGE_BAND": {
+        "type": "string",
+        "acceptable_values": []
+    }
+}
+
+DatasetValidationConfiguration = {
+    
+}
--- a/etl/epc/generate_scenarios_data.py
+++ b/etl/epc/generate_scenarios_data.py
@ -0,0 +1,289 @@
+from datetime import datetime
+import itertools
+
+import pandas as pd
+from etl.epc.Record import EPCRecord
+from backend.SearchEpc import SearchEpc
+
+from sqlalchemy.orm import sessionmaker
+
+from backend.app.config import get_settings
+from backend.app.db.connection import db_engine
+from backend.app.db.functions.materials_functions import get_materials
+
+from backend.app.plan.utils import get_cleaned
+
+from backend.Property import Property
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
+from recommendations.Recommendations import Recommendations
+from utils.logger import setup_logger
+from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
+
+from datetime import datetime
+
+now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
+
+logger = setup_logger()
+
+logger.info("Connecting to db")
+session = sessionmaker(bind=db_engine)()
+created_at = datetime.now().isoformat()
+
+session.begin()
+logger.info("Getting the inputs")
+
+cleaning_data = read_dataframe_from_s3_parquet(
+    bucket_name=get_settings().DATA_BUCKET,
+    file_key="sap_change_model/cleaning_dataset.parquet",
+)
+
+materials = get_materials(session)
+cleaned = get_cleaned()
+
+uprn_filenames = read_dataframe_from_s3_parquet(
+    bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
+)
+photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(
+    bucket=get_settings().DATA_BUCKET
+)
+
+scenario_properties = [
+    {
+        "address": "2 South Terrace",
+        "postcode": "NN1 5JY",
+        "lmk-key": "1459796789102016070507274146560098",
+        "measures": [
+            [
+                ["internal_wall_insulation"],
+                "11",
+                {"walls_insulation_thickness_ending": "average"},
+                [0],
+            ],
+            [
+                ["external_wall_insulation"],
+                "10",
+                {"walls_insulation_thickness_ending": "average"},
+                [0],
+            ],
+            [["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]],
+        ],
+    },
+    {
+        "address": "8 Lindlings",
+        "postcode": "HP1 2HA",
+        "lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1",
+        "measures": [
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
+        ],
+    },
+    {
+        "address": "44 Lindlings",
+        "postcode": "HP1 2HE",
+        "lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117",
+        "measures": [
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
+        ],
+    },
+    {
+        "address": "46 Chaulden Terrace",
+        "postcode": "HP1 2AN",
+        "lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50",
+        "measures": [
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
+        ],
+    },
+    {
+        "address": "73 Long Chaulden",
+        "postcode": "HP1 2HX",
+        "lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a",
+        "measures": [
+            [
+                ["cavity_wall_insulation", "loft_insulation"],
+                "15",
+                {"walls_insulation_thickness_ending": "average"},
+                [0, 1],
+            ],
+        ],
+    },
+]
+
+
+recommendations_scoring_data = []
+
+for scenario_property in scenario_properties:
+    # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
+
+    epc_searcher = SearchEpc(
+        address1=scenario_property["address"],
+        postcode=scenario_property["postcode"],
+        auth_token=get_settings().EPC_AUTH_TOKEN,
+        os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
+    )
+    epc_searcher.find_property()
+
+    # Find the epc with the same LMK key
+    all_epcs = epc_searcher.older_epcs.copy()
+    all_epcs.extend([epc_searcher.newest_epc, epc_searcher.full_sap_epc])
+    original_epc = [
+        epc
+        for epc in all_epcs
+        if epc.get("lmk-key", None) == scenario_property.get("lmk-key")
+    ][0]
+
+    epc_records = {
+        "original_epc": original_epc,
+        "full_sap_epc": {},
+        "old_data": [],
+    }
+
+    prepared_epc = EPCRecord(
+        epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data
+    )
+
+    p = Property(
+        id=prepared_epc.uprn,
+        address=epc_searcher.address_clean,
+        postcode=epc_searcher.postcode_clean,
+        epc_record=prepared_epc,
+    )
+
+    p.get_spatial_data(uprn_filenames)
+    p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
+
+    recommender = Recommendations(property_instance=p, materials=materials)
+    property_recommendations = recommender.recommend("0")
+
+    wall_recommendations = recommender.wall_recomender.recommendations
+    loft_recommendations = recommender.roof_recommender.recommendations
+    solar_recommendations = recommender.solar_recommender.recommendation
+    windows_recommendations = recommender.windows_recommender.recommendation
+
+    p.create_base_difference_epc_record(cleaned_lookup=cleaned)
+
+    scoring_list = []
+
+    # Create the record for each of the different measures
+    for measure_impact_override in scenario_property["measures"]:
+
+        measure = measure_impact_override[0]
+        impact = measure_impact_override[1]
+        override = measure_impact_override[2]
+
+        wall_recs = []
+        loft_recs = []
+        solar_recs = []
+        windows_recs = []
+
+        if "internal_wall_insulation" in measure:
+            for rec in wall_recommendations:
+                if rec["type"] == "internal_wall_insulation":
+                    wall_recs.append(rec)
+
+        if "external_wall_insulation" in measure:
+            for rec in wall_recommendations:
+                if rec["type"] == "external_wall_insulation":
+                    wall_recs.append(rec)
+
+        if "cavity_wall_insulation" in measure:
+            for rec in wall_recommendations:
+                if rec["type"] == "cavity_wall_insulation":
+                    wall_recs.append(rec)
+
+        if "loft_insulation" in measure:
+            loft_recs = []
+            for rec in loft_recommendations:
+                if rec["type"] == "loft_insulation":
+                    loft_recs.append(rec)
+
+        if "solar" in measure:
+            for rec in solar_recommendations:
+                if rec["type"] == "solar_pv":
+                    solar_recs.append(rec)
+
+        if "windows" in measure:
+            for rec in windows_recommendations:
+                if rec["type"] == "windows_glazing":
+                    windows_recs.append(rec)
+
+        combi_list = [wall_recs, loft_recs, solar_recs, windows_recs]
+        combi_list = [element for element in combi_list if len(element) != 0]
+
+        all_combi_recommendations = list(itertools.product(*combi_list))
+
+        for i, combi in enumerate(all_combi_recommendations):
+            recommendation_record = p.base_difference_record.df.to_dict("records")[
+                0
+            ].copy()
+            recommendation_record = p.create_recommendation_scoring_data(
+                property_id=i,
+                primary_recommendation_id=i,
+                recommendation_record=recommendation_record,
+                recommendations=combi,
+            )
+
+            if override is not None:
+                for key, value in override.items():
+                    recommendation_record[key] = value
+
+            recommendation_record["id"] = "&".join(measure) + "+" + str(i)
+            recommendation_record["impact"] = impact
+            scoring_list.append(recommendation_record)
+
+    recommendations_scoring_data.extend(scoring_list)
+
+recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
+recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype(
+    int
+)
+recommendations_scoring_data = recommendations_scoring_data.drop(
+    columns=[
+        "rdsap_change",
+        "heat_demand_change",
+        "carbon_change",
+        "sap_ending",
+        "heat_demand_ending",
+        "carbon_ending",
+    ]
+)
+
+impact_col = recommendations_scoring_data.pop("impact")
+recommendations_scoring_data.insert(0, "impact", impact_col)
+
+id_col = recommendations_scoring_data.pop("id")
+recommendations_scoring_data.insert(0, "id", id_col)
+
+from backend.ml_models.api import ModelApi
+
+model_api = ModelApi(portfolio_id="generate-scenarios-data", timestamp=created_at)
+
+all_predictions = model_api.predict_all(
+    df=recommendations_scoring_data,
+    bucket=get_settings().DATA_BUCKET,
+    prediction_buckets={
+        "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
+        "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
+        "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
+    },
+)
+
+save_dataframe_to_s3_parquet(
+    recommendations_scoring_data,
+    "retrofit-data-dev",
+    f"scenario_data/{now}/recommendations_scoring_data.parquet",
+)
--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@ -1,636 +1,39 @@
 import pandas as pd
-import numpy as np
-from tqdm import tqdm
-import msgpack
-
 from pathlib import Path
-from etl.epc.settings import (
-    MANDATORY_FIXED_FEATURES,
-    LATEST_FIELD,
-    COMPONENT_FEATURES,
-    RDSAP_RESPONSE,
-    HEAT_DEMAND_RESPONSE,
-    COLUMNS_TO_MERGE_ON,
-    CARBON_RESPONSE,
-    CORE_COMPONENT_FEATURES,
-    EFFICIENCY_FEATURES,
-    POTENTIAL_COLUMNS,
-    MINIMUM_FLOOR_HEIGHT
-)
-from etl.epc.DataProcessor import DataProcessor
-from utils.s3 import save_dataframe_to_s3_parquet, read_from_s3
-from recommendations.rdsap_tables import england_wales_age_band_lookup
-from recommendations.recommendation_utils import (
-    get_wall_u_value, get_roof_u_value, get_floor_u_value, estimate_perimeter,
-    get_wall_type
-)
+from etl.epc.DataProcessor import EPCDataProcessor
+from etl.epc.Pipeline import EPCPipeline

 DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"


-def get_cleaned():
+def main():
    """
-    This function will retrieve the cleaned dataset from s3 which has the cleaned
-    descriptions for the epc dataset
-
-    This data is stored in MessagePack format and therefore needs to be decoded
-    :return:
+    Orchestration function
    """

-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
-    )
-
-    cleaned = msgpack.unpackb(cleaned, raw=False)
-
-    return cleaned
-
-
-def process_and_prune_desriptions(df, cleaned_lookup):
-    """
-    This method will merge on the cleaned lookup table and ensure that the building fabric in the
-    starting and ending EPC is consistent, so ensure that we are performing our modelling on the cleanest
-    possible dataset.
-    :param df:
-    :param cleaned_lookup:
-    :return:
-    """
-
-    cols_to_drop = {
-        "walls": [
-            # We need to cleaned descriptions for pulling out u-values
-            'original_description', 'thermal_transmittance_unit',
-            'original_description_ENDING',
-            'thermal_transmittance_unit_ENDING',
-            'is_cavity_wall_ENDING', 'is_filled_cavity_ENDING',
-            'is_solid_brick_ENDING', 'is_system_built_ENDING',
-            'is_timber_frame_ENDING', 'is_granite_or_whinstone_ENDING',
-            'is_as_built_ENDING', 'is_cob_ENDING', 'is_assumed_ENDING',
-            'is_sandstone_or_limestone_ENDING',
-            # Re remove the is_assumed columns
-            "is_assumed", "is_assumed_ENDING"
-        ],
-        "floor": [
-            "original_description", "clean_description", "thermal_transmittance_unit",
-            "no_data", "no_data_ENDING", "original_description_ENDING",
-            "clean_description_ENDING", "thermal_transmittance_unit_ENDING",
-            "is_suspended_ENDING", "is_solid_ENDING", "another_property_below_ENDING",
-            "is_to_unheated_space_ENDING", "is_to_external_air_ENDING", "is_assumed",
-            "is_assumed_ENDING"
-        ],
-        "roof": [
-            "original_description", "clean_description", "thermal_transmittance_unit",
-            "is_assumed", "is_valid", "original_description_ENDING", "clean_description_ENDING",
-            "thermal_transmittance_unit_ENDING", "is_pitched_ENDING", "is_roof_room_ENDING",
-            "is_loft_ENDING", "is_flat_ENDING", "is_thatched_ENDING", "is_at_rafters_ENDING",
-            "has_dwelling_above_ENDING", "is_assumed_ENDING", "is_valid_ENDING"
-        ],
-        "hotwater": [
-            "original_description", "clean_description", "assumed", "original_description_ENDING",
-            "clean_description_ENDING", "assumed_ENDING"
-        ],
-        "mainheat": [
-            "original_description", "clean_description", "original_description_ENDING",
-            "has_assumed", "original_description_ENDING", "clean_description_ENDING",
-            "has_assumed_ENDING",
-        ],
-        "mainheatcont": [
-            "original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING"
-        ],
-        "windows": [
-            "original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING",
-            # We don't need many of the glazing coverage features because we have the multi_glaze_proportion feature
-            "has_glazing", "glazing_coverage", "no_data", "has_glazing_ENDING", "glazing_coverage_ENDING",
-            "no_data_ENDING"
-        ],
-        "main-fuel": [
-            "original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING"
-        ],
-    }
-
-    for component in ["walls", "floor", "roof", "hotwater", "mainheat", "mainheatcont", "windows", "main-fuel"]:
-        component_upper = component.upper()
-        if component == "main-fuel":
-            component_upper = component_upper.replace("-", "_")
-
-        cleaned_key = "main-fuel" if component == "main-fuel" else f"{component}-description"
-        left_on_starting = (
-            f"{component_upper}_STARTING" if component == "main-fuel" else f"{component_upper}_DESCRIPTION_STARTING"
-        )
-
-        left_on_ending = (
-            f"{component_upper}_ENDING" if component == "main-fuel" else f"{component_upper}_DESCRIPTION_ENDING"
-        )
-
-        df = df.merge(
-            pd.DataFrame(cleaned_lookup[cleaned_key]),
-            how="left",
-            left_on=left_on_starting,
-            right_on="original_description",
-        ).merge(
-            pd.DataFrame(cleaned_lookup[cleaned_key]),
-            how="left",
-            left_on=left_on_ending,
-            right_on="original_description",
-            suffixes=("", "_ENDING")
-        )
-
-        if component == "walls":
-            # We make sure the wall construction hasn't changed
-            df = df[
-                (df["is_cavity_wall"] == df["is_cavity_wall_ENDING"]) &
-                (df["is_solid_brick"] == df["is_solid_brick_ENDING"]) &
-                (df["is_timber_frame"] == df["is_timber_frame_ENDING"]) &
-                (df["is_granite_or_whinstone"] == df["is_granite_or_whinstone_ENDING"]) &
-                (df["is_cob"] == df["is_cob_ENDING"]) &
-                (df["is_sandstone_or_limestone"] == df["is_sandstone_or_limestone_ENDING"])
-                ]
-        elif component == "floor":
-            df = df[
-                (df["is_suspended"] == df["is_suspended_ENDING"]) &
-                (df["is_solid"] == df["is_solid_ENDING"]) &
-                (df["another_property_below"] == df["another_property_below_ENDING"]) &
-                (df["is_to_unheated_space"] == df["is_to_unheated_space_ENDING"]) &
-                (df["is_to_external_air"] == df["is_to_external_air_ENDING"])
-                ]
-        elif component == "roof":
-            df = df[
-                (df["is_pitched"] == df["is_pitched_ENDING"]) &
-                (df["is_roof_room"] == df["is_roof_room_ENDING"]) &
-                (df["is_loft"] == df["is_loft_ENDING"]) &
-                (df["is_flat"] == df["is_flat_ENDING"]) &
-                (df["is_thatched"] == df["is_thatched_ENDING"]) &
-                (df["is_at_rafters"] == df["is_at_rafters_ENDING"]) &
-                (df["has_dwelling_above"] == df["has_dwelling_above_ENDING"])
-                ]
-
-        # Drop the binary indicators and replace the original description with the cleaned version
-
-        # Drop original cols
-        original_cols = [
-            f"{component_upper}_DESCRIPTION_STARTING", f"{component_upper}_DESCRIPTION_ENDING"
-        ] if component != "main-fuel" else [
-            f"{component_upper}_STARTING", f"{component_upper}_ENDING"
-        ]
-
-        df = df.drop(columns=cols_to_drop[component] + original_cols)
-
-        # If we have an insulation_thickness column, rename it
-        if "insulation_thickness" in cleaned_lookup[cleaned_key][0]:
-            df = df.rename(
-                columns={
-                    "insulation_thickness": f"{component}_insulation_thickness",
-                    "insulation_thickness_ENDING": f"{component}_insulation_thickness_ENDING",
-                }
-            )
-        # If we have thermal transmittance, rename it
-        if "thermal_transmittance" in cleaned_lookup[cleaned_key][0]:
-            df = df.rename(
-                columns={
-                    "thermal_transmittance": f"{component}_thermal_transmittance",
-                    "thermal_transmittance_ENDING": f"{component}_thermal_transmittance_ENDING",
-                }
-            )
-
-        # If we have tarrif, rename it
-        if "tariff_type" in cleaned_lookup[cleaned_key][0]:
-            df = df.rename(
-                columns={
-                    "tariff_type": f"{component}_tariff_type",
-                    "tariff_type_ENDING": f"{component}_tariff_type_ENDING",
-                }
-            )
-
-        # We need the walls descriptions so we rename them to distinguish them
-        if component == "walls":
-            df = df.rename(
-                columns={
-                    "clean_description": f"{component}_clean_description",
-                    "clean_description_ENDING": f"{component}_clean_description_ENDING",
-                }
-            )
-
-    # We don't need any lighting specific cleaning, we just drop the original description as we use
-    # LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING
-
-    df = df.drop(columns=["LIGHTING_DESCRIPTION_STARTING", "LIGHTING_DESCRIPTION_ENDING"])
-
-    return df
-
-
-def make_uvalues(df):
-    df["row_index"] = df.index
-
-    uvalues = []
-    for _, x in df.iterrows():
-
-        uprn = x["UPRN"]
-        row_index = x["row_index"]
-        age_band = england_wales_age_band_lookup[x["CONSTRUCTION_AGE_BAND"]]
-
-        # ~~~~~~~~~~~~~~~~~~
-        # Walls
-        # ~~~~~~~~~~~~~~~~~~
-
-        starting_wall_uvalue = x["walls_thermal_transmittance"]
-        if pd.isnull(starting_wall_uvalue):
-            starting_wall_uvalue = get_wall_u_value(
-                clean_description=x["walls_clean_description"],
-                age_band=age_band,
-                is_granite_or_whinstone=x["is_granite_or_whinstone"],
-                is_sandstone_or_limestone=x["is_sandstone_or_limestone"],
-            )
-
-        ending_wall_uvalue = x["walls_thermal_transmittance_ENDING"]
-        if pd.isnull(ending_wall_uvalue):
-            if x["walls_clean_description"] != x["walls_clean_description_ENDING"]:
-                ending_wall_uvalue = get_wall_u_value(
-                    clean_description=x["walls_clean_description_ENDING"],
-                    age_band=age_band,
-                    is_granite_or_whinstone=x["is_granite_or_whinstone"],
-                    is_sandstone_or_limestone=x["is_sandstone_or_limestone"],
-                )
-            else:
-                ending_wall_uvalue = starting_wall_uvalue
-
-        # ~~~~~~~~~~~~~~~~~~
-        # Roof
-        # ~~~~~~~~~~~~~~~~~~
-
-        if x["has_dwelling_above"]:
-            if x["roof_thermal_transmittance"] != 0:
-                raise ValueError("Should have 0 u-value for roof")
-
-            if x["roof_thermal_transmittance_ENDING"] != 0:
-                raise ValueError("Should have 0 u-value for roof")
-
-        starting_roof_uvalue = x["roof_thermal_transmittance"]
-        if pd.isnull(starting_roof_uvalue):
-            starting_roof_uvalue = get_roof_u_value(
-                insulation_thickness=x["roof_insulation_thickness"],
-                has_dwelling_above=x["has_dwelling_above"],
-                is_loft=x["is_loft"],
-                is_roof_room=x["is_roof_room"],
-                is_thatched=x["is_thatched"],
-                is_flat=x["is_flat"],
-                is_pitched=x["is_pitched"],
-                is_at_rafters=x["is_at_rafters"],
-                age_band=age_band
-            )
-
-        ending_roof_uvalue = x["roof_thermal_transmittance_ENDING"]
-
-        if pd.isnull(ending_roof_uvalue):
-            ending_roof_uvalue = get_roof_u_value(
-                insulation_thickness=x["roof_insulation_thickness_ENDING"],
-                has_dwelling_above=x["has_dwelling_above"],
-                is_loft=x["is_loft"],
-                is_roof_room=x["is_roof_room"],
-                is_thatched=x["is_thatched"],
-                is_flat=x["is_flat"],
-                is_pitched=x["is_pitched"],
-                is_at_rafters=x["is_at_rafters"],
-                age_band=age_band
-            )
-
-        # ~~~~~~~~~~~~~~~~~~
-        # Floor
-        # ~~~~~~~~~~~~~~~~~~
-        perimeters = {}
-        for suffix in ["_STARTING", "_ENDING"]:
-            floor_area = x[f"TOTAL_FLOOR_AREA{suffix}"]
-            n_rooms = x["NUMBER_HABITABLE_ROOMS"]
-
-            perimeters[f"estimated_perimeter{suffix}"] = estimate_perimeter(floor_area, n_rooms)
-
-        floor_type = "suspended" if x["is_suspended"] else "solid"
-        wall_type = get_wall_type(**x)
-
-        if x["another_property_below"]:
-            if x["floor_thermal_transmittance"] != 0:
-                raise ValueError("Should have 0 u-value for floor")
-
-            if x["floor_thermal_transmittance_ENDING"] != 0:
-                raise ValueError("Should have 0 u-value for floor")
-            starting_floor_uvalue, ending_floor_uvalue = 0, 0
-        else:
-            starting_floor_uvalue = x["floor_thermal_transmittance"]
-            ending_floor_uvalue = x["floor_thermal_transmittance_ENDING"]
-
-        if pd.isnull(starting_floor_uvalue):
-            starting_floor_uvalue = get_floor_u_value(
-                floor_type=floor_type,
-                perimeter=perimeters["estimated_perimeter_STARTING"],
-                area=x[f"TOTAL_FLOOR_AREA_STARTING"],
-                insulation_thickness=x["floor_insulation_thickness"],
-                wall_type=wall_type,
-                age_band=age_band
-            )
-
-        if pd.isnull(ending_floor_uvalue):
-            ending_floor_uvalue = get_floor_u_value(
-                floor_type=floor_type,
-                perimeter=perimeters["estimated_perimeter_ENDING"],
-                area=x[f"TOTAL_FLOOR_AREA_ENDING"],
-                insulation_thickness=x["floor_insulation_thickness_ENDING"],
-                wall_type=wall_type,
-                age_band=age_band
-            )
-
-        uvalues.append(
-            {
-                "UPRN": uprn,
-                "row_index": row_index,
-                "starting_walls_uvalue": starting_wall_uvalue,
-                "ending_walls_uvalue": ending_wall_uvalue,
-                "starting_roof_uvalue": starting_roof_uvalue,
-                "ending_roof_uvalue": ending_roof_uvalue,
-                "starting_floor_uvalue": starting_floor_uvalue,
-                "ending_floor_uvalue": ending_floor_uvalue,
-                **perimeters
-            }
-        )
-
-    uvalues = pd.DataFrame(uvalues)
-
-    df = df.merge(
-        uvalues, how="left", on=["UPRN", "row_index"]
-    ).drop(columns="row_index")
-
-    # Fill missings
-    for component in ["walls", "floor", "roof"]:
-        for suffix in ["", "_ENDING"]:
-            fill_col = f"starting_{component}_uvalue" if suffix == "" else f"ending_{component}_uvalue"
-
-            df[f"{component}_thermal_transmittance{suffix}"] = np.where(
-                pd.isnull(df[f"{component}_thermal_transmittance{suffix}"]),
-                df[fill_col],
-                df[f"{component}_thermal_transmittance{suffix}"]
-            )
-
-    df = df.drop(
-        columns=[
-            "starting_walls_uvalue", "ending_walls_uvalue", "starting_roof_uvalue",
-            "ending_roof_uvalue", "starting_floor_uvalue", "ending_floor_uvalue"
-        ]
-    )
-
-    return df
-
-
-def compare_records(earliest_record: pd.Series, latest_record: pd.Series, columns: list):
-    """
-    For a list of columns, check if the earliest and latest record are the same
-    If they are the same, we indicate this, because we have example of SAP scores changing
-    without any feature changes
-    :param earliest_record: pd.Series
-    :param latest_record: pd.Series
-    :param columns: list of columns to compare
-    :return: boolean indicating whether or not all features are the same
-    """
-
-    all_equal = True
-    for col in columns:
-        if earliest_record[col] != latest_record[col]:
-            return False
-    if all_equal:
-        return True
-
-
-def app():
-    # Get all the files in the directory
-
-    # Data glossary:
-    # https://epc.opendatacommunities.org/docs/guidance#glossary
-
-    cleaned_lookup = get_cleaned()
-
-    # List all subdirectories
    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+    # directories = directories[0:3]

-    dataset = []
-    cleaning_dataset = []
-    # Keep track of the all equals
-    all_equal_rows = []
-
-    for directory in tqdm(directories):
-        filepath = directory / "certificates.csv"
-
-        data_processor = DataProcessor(filepath=filepath)
-
-        df = data_processor.pre_process()
-
-        cleaning_averages = data_processor.make_cleaning_averages()
-
-        # We have some odd cases with missing constituency so we fill
-        df = df.fillna({"CONSTITUENCY": df["CONSTITUENCY"].mode().values[0]})
-
-        df = DataProcessor.apply_averages_cleaning(
-            data_to_clean=df,
-            cleaning_data=cleaning_averages,
-            cols_to_merge_on=COLUMNS_TO_MERGE_ON
-        )
-
-        data_by_urpn = []
-        for uprn, property_data in df.groupby("UPRN", observed=True):
-
-            # Fixed features - these are property attributes that shouldn't change over time
-            fixed_data = {}
-
-            # If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
-            if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1) or (
-                pd.isnull(property_data[MANDATORY_FIXED_FEATURES]).sum().sum() > 0
-            ):
-                continue
-
-            # Take the latest row for both the LATEST_FEILDS and MANDATORY FIELDS
-            latest_field_data = property_data[LATEST_FIELD].iloc[-1].to_dict()
-            mandatory_field_data = (
-                property_data[MANDATORY_FIXED_FEATURES].iloc[-1].to_dict()
-            )
-
-            # Combine all fields together
-            fixed_data.update(mandatory_field_data)
-            fixed_data.update(latest_field_data)
-
-            # We include the lodgement date here as we probably need to factor time into the
-            # model, since EPC standards and rigour have changed over time
-            variable_data = property_data[
-                COMPONENT_FEATURES + EFFICIENCY_FEATURES + POTENTIAL_COLUMNS + [
-                    "LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE
-                ]
-                ]
-
-            # Note: we look at changes between subsequent EPCS, however we could look at other permutations
-            # e.g. first vs second, second vs third and also first vs third
-            property_model_data = []
-            for idx in range(0, property_data.shape[0] - 1):
-
-                if idx >= property_data.shape[0] - 1:
-                    break
-
-                earliest_record = variable_data.iloc[idx]
-                latest_record = variable_data.iloc[idx + 1]
-
-                # Check if the sap gets better or worse
-                gets_better = earliest_record[RDSAP_RESPONSE] <= latest_record[RDSAP_RESPONSE]
-
-                component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES
-
-                if gets_better:
-                    starting_sap = earliest_record[RDSAP_RESPONSE]
-                    starting_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
-                    starting_carbon = earliest_record[CARBON_RESPONSE]
-
-                    ending_sap = latest_record[RDSAP_RESPONSE]
-                    ending_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
-                    ending_carbon = latest_record[CARBON_RESPONSE]
-
-                    rdsap_change = latest_record[RDSAP_RESPONSE] - starting_sap
-                    heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
-                    carbon_change = latest_record[CARBON_RESPONSE] - starting_carbon
-
-                    starting_record = earliest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
-                    ending_record = latest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
-                else:
-                    starting_sap = latest_record[RDSAP_RESPONSE]
-                    starting_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
-                    starting_carbon = latest_record[CARBON_RESPONSE]
-
-                    ending_sap = earliest_record[RDSAP_RESPONSE]
-                    ending_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
-                    ending_carbon = earliest_record[CARBON_RESPONSE]
-
-                    rdsap_change = earliest_record[RDSAP_RESPONSE] - starting_sap
-                    heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
-                    carbon_change = earliest_record[CARBON_RESPONSE] - starting_carbon
-
-                    starting_record = latest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
-                    ending_record = earliest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
-
-                if rdsap_change == 0:
-                    continue
-
-                all_equal = compare_records(
-                    earliest_record=earliest_record,
-                    latest_record=latest_record,
-                    columns=CORE_COMPONENT_FEATURES
-                )
-
-                if all_equal:
-                    # Keep track of this for the moment so we can analyse
-                    all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
-                    continue
-
-                features = pd.concat([starting_record, ending_record])
-
-                property_model_data.append(
-                    {
-                        "UPRN": uprn,
-                        "RDSAP_CHANGE": rdsap_change,
-                        "HEAT_DEMAND_CHANGE": heat_demand_change,
-                        "CARBON_CHANGE": carbon_change,
-                        "SAP_STARTING": starting_sap,
-                        "SAP_ENDING": ending_sap,
-                        "HEAT_DEMAND_STARTING": starting_heat_demand,
-                        "HEAT_DEMAND_ENDING": ending_heat_demand,
-                        "CARBON_STARTING": starting_carbon,
-                        "CARBON_ENDING": ending_carbon,
-                        "POTENTIAL_ENERGY_EFFICIENCY": earliest_record["POTENTIAL_ENERGY_EFFICIENCY"],
-                        "ENVIRONMENT_IMPACT_POTENTIAL": earliest_record["ENVIRONMENT_IMPACT_POTENTIAL"],
-                        "ENERGY_CONSUMPTION_POTENTIAL": earliest_record["ENERGY_CONSUMPTION_POTENTIAL"],
-                        "CO2_EMISSIONS_POTENTIAL": earliest_record["CO2_EMISSIONS_POTENTIAL"],
-                        **fixed_data,
-                        **features.to_dict(),
-                    }
-                )
-
-            data_by_urpn.extend(property_model_data)
-
-        data_by_urpn_df = pd.DataFrame(data_by_urpn)
-
-        data_by_urpn_df["DAYS_TO_STARTING"] = DataProcessor.calculate_days_to(
-            data_by_urpn_df["LODGEMENT_DATE_STARTING"]
-        )
-
-        data_by_urpn_df["DAYS_TO_ENDING"] = DataProcessor.calculate_days_to(
-            data_by_urpn_df["LODGEMENT_DATE_ENDING"]
-        )
-
-        data_by_urpn_df = data_by_urpn_df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"])
-
-        data_by_urpn_df = DataProcessor.clean_efficiency_variables(data_by_urpn_df)
-
-        # We look for key building fabric features that have changed from one EPC to the next.
-        # if, for example, we see that a home has gone from being a cavity wall to a solid wall, we
-        # remove this record, as it indicates that the quality of the EPC conducted in the first instance
-        # is low
-        # We also replace descriptions with their cleaned variants
-
-        if pd.isnull(data_by_urpn_df).sum().sum():
-            raise ValueError("Null values found in dataset")
-
-        data_by_urpn_df = process_and_prune_desriptions(data_by_urpn_df, cleaned_lookup)
-
-        # Apply u-values
-        for col in ["walls_clean_description", "walls_clean_description_ENDING"]:
-            data_by_urpn_df[col] = data_by_urpn_df[col].str.replace("(assumed)", "").str.rstrip()
-
-        data_by_urpn_df = make_uvalues(data_by_urpn_df).drop(
-            columns=["walls_clean_description", "walls_clean_description_ENDING"]
-        )
-
-        # TODO: For some of the features that we clean, we have either a true, false or possibly null value
-        #       Those nulls should be False. clean_missings_after_description_process handles this but shouldn't
-        #       need to
-
-        data_by_urpn_df = DataProcessor.clean_missings_after_description_process(data_by_urpn_df)
-
-        if pd.isnull(data_by_urpn_df).sum().sum():
-            raise ValueError("Null values found in dataset after process_and_prune_desriptions")
-
-        dataset.append(data_by_urpn_df)
-
-        cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0]
-        cleaning_dataset.append(cleaning_averages)
-
-    print("Final all equal count: %s" % str(len(all_equal_rows)))
-
-    # Store cleaning dataset in s3 as a parquet file
-    cleaning_dataset = pd.concat(cleaning_dataset)
-    save_dataframe_to_s3_parquet(
-        df=cleaning_dataset,
-        bucket_name="retrofit-data-dev",
-        file_key="sap_change_model/cleaning_dataset.parquet",
+    epc_pipeline = EPCPipeline(
+        directories=directories,
+        use_parallel=True,
+        epc_data_processor=EPCDataProcessor(run_mode="training"),
    )

-    output = pd.concat(dataset)
+    epc_pipeline.run()

-    # Remove any records that have huge swings in their floor area
-    output["tfa_diff_abs"] = abs(output["TOTAL_FLOOR_AREA_ENDING"] - output["TOTAL_FLOOR_AREA_STARTING"])
-    output["tfa_diff_prop"] = output["tfa_diff_abs"] / output["TOTAL_FLOOR_AREA_STARTING"]
-    output = output[output["tfa_diff_prop"] < 0.5]
-    output = output.drop(columns=["tfa_diff_abs", "tfa_diff_prop"])
+    # For testing
+    # dataset_df = epc_pipeline.compiled_dataset
+    # dataset_df.to_parquet("refactor_datasets/dataset_with0perm_all.parquet")
+    # pd.DataFrame(epc_pipeline.compiled_all_equal_rows).to_parquet("refactor_datasets/all_equal_rows_with0perm_all.parquet")
+    # pd.concat(epc_pipeline.compiled_cleaning_averages).to_parquet("refactor_datasets/cleaning_averages_with0perm_all.parquet")

-    uvalue_columns = [col for col in output.columns if "thermal_transmittance" in col]
-    for uvalue_col in uvalue_columns:
-        output[uvalue_col] = pd.to_numeric(output[uvalue_col])
-
-    save_dataframe_to_s3_parquet(
-        df=output,
-        bucket_name="retrofit-data-dev",
-        file_key="sap_change_model/dataset.parquet",
-    )
-
-    # Store all_equal_rows
-    all_equal_rows = pd.DataFrame(all_equal_rows)
-    save_dataframe_to_s3_parquet(
-        df=all_equal_rows,
-        bucket_name="retrofit-data-dev",
-        file_key="sap_change_model/all_equal_rows.parquet",
-    )
+    # from utils.s3 import read_dataframe_from_s3_parquet
+    # dataset = read_dataframe_from_s3_parquet(
+    #     bucket_name="retrofit-data-dev",
+    #     file_key="sap_change_model/dataset_test.parquet",
+    # )


 if __name__ == "__main__":
-    app()
+    main()
--- a/etl/epc/requirements.txt
+++ b/etl/epc/requirements.txt
@ -0,0 +1,5 @@
+pandas==2.1.3
+tqdm==4.66.1
+msgpack==1.0.7
+boto3==1.29.6
+pyarrow==15.0.2
--- a/etl/epc/settings.py
+++ b/etl/epc/settings.py
@ -2,6 +2,63 @@
 # TODO: migrate to dynaconf
 from pathlib import Path

+DATA_ANOMALY_MATCHES = {
+    # Invalid reports are where the value provided is out of bounds, e.g. a negative energy rating of -1199 or a
+    # non-integer, there is no valid energy band for this, so it is marked as INVALID!
+    "INVALID",
+    "INVALID!",
+    # When the energy certificate was first lodged on the register there was no requirement to lodge this data
+    # item, i.e. a non-mandatory item.
+    "NO DATA!",
+    "NODATA!",
+    # When the energy certificate was first lodged on the register there was no requirement to lodge this data item,
+    # i.e.a non - mandatory item.
+    "N/A",
+    # A value generated by the register to account for a data item that was not mandatory when the lodgement of
+    # the energy certificate occurred. When the data item became mandatory the register operator, for backwards
+    # compatibility purposes, populated the data field with a value of ‘not recorded’ to ensure that the energy
+    # certificate retrieval process is successfully completed. Mandatory data items cannot be applied
+    # retrospectively to energy certificates lodged before the date of the change.
+    "Not recorded",
+    # The data also contains DECs with an operational rating of ‘9999’ (a ‘default’ DEC). The production of a
+    # ‘default’ DEC value was allowed to enable building occupiers, with poor quality or no energy data,
+    # the opportunity to comply with the regulations. From April 2011 the ability to lodge a ‘default’ DEC was no
+    # longer allowed.
+    "9999",
+    # The Building Emission Rate (BER) data field for non-domestic buildings may contain a ‘blank’ value. The BER
+    # was only lodged on the register from 7 March 2010.
+    "Blank"
+    # There are currently just over 8,600 records where the local authority identifier is ‘null’. This is due to
+    # the Register Operator not being able to match the building address in the Markermap Ordinance Survey (GB)
+    # lookup tables or OS MasterMap Address Layer 2 data. The majority of these addresses have been requested
+    # manually by energy assessors for inclusion by the Register Operator in the registers (e.g. new builds,
+    # etc). These records are being published for completeness. An ongoing process to manage these manually added
+    # addresses will take time to develop to deal with these and future anomalies.
+    #
+    # There are several fields within the lodged data where it is possible to enter multiple entries to cater for
+    # different data_types of build within a single property, i.e. extensions. This results in multiple entries for
+    # the description fields for floor, roof and wall. For the purposes of this data release only the information
+    # contained within the first of these multiple entries is being provided. As there are no restrictions on the
+    # value in this first field it means that sometimes the first field in a multiple entry description field may
+    # contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases.
+    "NULL",
+    # We sometimes see fields populated with just an empty string.
+    "",
+    # We sometimes find None values - particulatly when we produce an estimated EPC
+    None,
+    # An older value which rarely shows up but has been seen in the data.
+    "UNKNOWN",
+}
+
+DATA_ANOMALY_SUBSTRINGS = {
+    # Where values in a ‘pick’ list that have been superseded by another value. For example, where a value for
+    # ‘pitched roof’ has been replaced by three sub-categories of pitched roof. The original value is retained
+    # but ‘for backward compatibility only’ it is appended to ensure that the energy certificate retrieval
+    # process can be successfully completed. Replacement data items cannot be applied retrospectively to energy
+    # certificates lodged on the register before the date of the change.
+    "for backward compatibility only"
+}
+
 METRIC_FILENAME = "metrics.csv"

 OPTIMISE_METRIC = "mean_absolute_error"
@ -106,17 +163,20 @@ CORE_COMPONENT_FEATURES = [
 ]

 EFFICIENCY_FEATURES = [
-    'HOT_WATER_ENERGY_EFF',
-    'FLOOR_ENERGY_EFF',
-    'WINDOWS_ENERGY_EFF',
-    'WALLS_ENERGY_EFF',
-    'SHEATING_ENERGY_EFF',
-    'ROOF_ENERGY_EFF',
-    'MAINHEAT_ENERGY_EFF',
-    'MAINHEATC_ENERGY_EFF',
-    'LIGHTING_ENERGY_EFF'
+    "HOT_WATER_ENERGY_EFF",
+    "FLOOR_ENERGY_EFF",
+    "WINDOWS_ENERGY_EFF",
+    "WALLS_ENERGY_EFF",
+    "SHEATING_ENERGY_EFF",
+    "ROOF_ENERGY_EFF",
+    "MAINHEAT_ENERGY_EFF",
+    "MAINHEATC_ENERGY_EFF",
+    "LIGHTING_ENERGY_EFF",
 ]

+ROOM_FEATURES = ["number_habitable_rooms", "number_heated_rooms"]
+
+
 COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
    "TRANSACTION_TYPE",
    "ENERGY_TARIFF",  # Not sure if this is relevant
@ -127,10 +187,10 @@ COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
 ]

 POTENTIAL_COLUMNS = [
-    'POTENTIAL_ENERGY_EFFICIENCY',
-    'ENVIRONMENT_IMPACT_POTENTIAL',
-    'ENERGY_CONSUMPTION_POTENTIAL',
-    'CO2_EMISSIONS_POTENTIAL',
+    "POTENTIAL_ENERGY_EFFICIENCY",
+    "ENVIRONMENT_IMPACT_POTENTIAL",
+    "ENERGY_CONSUMPTION_POTENTIAL",
+    "CO2_EMISSIONS_POTENTIAL",
    # We don't include cost features for the moment
    # 'LIGHTING_COST_POTENTIAL',
    # 'HEATING_COST_POTENTIAL',
@ -155,6 +215,14 @@ MANDATORY_FIXED_FEATURES = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTITUENCY"]
 # and Wales from 31 July 2014
 EARLIEST_EPC_DATE = "2014-08-01"

+IGNORED_TRANSACTION_TYPES = "new dwelling"
+IGNORED_FLOOR_LEVELS = ["top floor", "mid floor"]
+IGNORED_PROPERTY_TYPES = "Park home"
+IGNORED_TENURES = [
+    "Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is not to be used "
+    "for an existing dwelling"
+]
+
 RDSAP_RESPONSE = "CURRENT_ENERGY_EFFICIENCY"
 HEAT_DEMAND_RESPONSE = "ENERGY_CONSUMPTION_CURRENT"
 CARBON_RESPONSE = "CO2_EMISSIONS_CURRENT"
@ -172,30 +240,55 @@ DATA_PROCESSOR_SETTINGS = {

 # This has a manual mapping of the column types required
 COLUMNTYPES = {
-    'UPRN': 'object', 'TOTAL_FLOOR_AREA': 'float64', 'FLOOR_HEIGHT': 'float64', 'PROPERTY_TYPE': 'object',
-    'BUILT_FORM': 'object', 'CONSTITUENCY': 'object', 'NUMBER_HABITABLE_ROOMS': 'float64',
-    'NUMBER_HEATED_ROOMS': 'float64', 'FIXED_LIGHTING_OUTLETS_COUNT': 'float64',
-    'CONSTRUCTION_AGE_BAND': 'object',
-    'TRANSACTION_TYPE': 'object',
-    'WALLS_DESCRIPTION': 'object',
-    'FLOOR_DESCRIPTION': 'object',
-    'LIGHTING_DESCRIPTION': 'object',
-    'ROOF_DESCRIPTION': 'object',
-    'MAINHEAT_DESCRIPTION': 'object',
-    'HOTWATER_DESCRIPTION': 'object', 'MAIN_FUEL': 'object',
-    'MECHANICAL_VENTILATION': 'object',
-    'SECONDHEAT_DESCRIPTION': 'object', 'ENERGY_TARIFF': 'object',
-    'SOLAR_WATER_HEATING_FLAG': 'object', 'PHOTO_SUPPLY': 'float64',
-    'WINDOWS_DESCRIPTION': 'object',
-    'GLAZED_TYPE': 'object',
-    'MULTI_GLAZE_PROPORTION': 'float64',
-    'LOW_ENERGY_LIGHTING': 'float64',
-    'NUMBER_OPEN_FIREPLACES': 'float64',
-    'MAINHEATCONT_DESCRIPTION': 'object',
-    'EXTENSION_COUNT': 'float64',
-    'LODGEMENT_DATE': 'object',
-    **dict(zip(EFFICIENCY_FEATURES, ['object', ] * len(EFFICIENCY_FEATURES))),
-    **dict(zip(POTENTIAL_COLUMNS, ['float64', ] * len(POTENTIAL_COLUMNS)))
+    "UPRN": "object",
+    "TOTAL_FLOOR_AREA": "float64",
+    "FLOOR_HEIGHT": "float64",
+    "PROPERTY_TYPE": "object",
+    "BUILT_FORM": "object",
+    "CONSTITUENCY": "object",
+    "NUMBER_HABITABLE_ROOMS": "float64",
+    "NUMBER_HEATED_ROOMS": "float64",
+    "FIXED_LIGHTING_OUTLETS_COUNT": "float64",
+    "CONSTRUCTION_AGE_BAND": "object",
+    "TRANSACTION_TYPE": "object",
+    "WALLS_DESCRIPTION": "object",
+    "FLOOR_DESCRIPTION": "object",
+    "LIGHTING_DESCRIPTION": "object",
+    "ROOF_DESCRIPTION": "object",
+    "MAINHEAT_DESCRIPTION": "object",
+    "HOTWATER_DESCRIPTION": "object",
+    "MAIN_FUEL": "object",
+    "MECHANICAL_VENTILATION": "object",
+    "SECONDHEAT_DESCRIPTION": "object",
+    "ENERGY_TARIFF": "object",
+    "SOLAR_WATER_HEATING_FLAG": "object",
+    "PHOTO_SUPPLY": "float64",
+    "WINDOWS_DESCRIPTION": "object",
+    "GLAZED_TYPE": "object",
+    "MULTI_GLAZE_PROPORTION": "float64",
+    "LOW_ENERGY_LIGHTING": "float64",
+    "NUMBER_OPEN_FIREPLACES": "float64",
+    "MAINHEATCONT_DESCRIPTION": "object",
+    "EXTENSION_COUNT": "float64",
+    "LODGEMENT_DATE": "object",
+    **dict(
+        zip(
+            EFFICIENCY_FEATURES,
+            [
+                "object",
+            ]
+            * len(EFFICIENCY_FEATURES),
+        )
+    ),
+    **dict(
+        zip(
+            POTENTIAL_COLUMNS,
+            [
+                "float64",
+            ]
+            * len(POTENTIAL_COLUMNS),
+        )
+    ),
 }

 # For modelling, we don't allow records with more than 100 SAP points
@ -215,7 +308,7 @@ fill_na_map = {
    "LOW_ENERGY_LIGHTING": 0,
    "MAINHEATCONT_DESCRIPTION": "Unknown",
    "EXTENSION_COUNT": 0,
-    "NUMBER_OPEN_FIREPLACES": 0
+    "NUMBER_OPEN_FIREPLACES": 0,
 }

 ################################################################################################
@ -224,62 +317,212 @@ fill_na_map = {
 ################################################################################################

 STARTING_SUFFIX_COMPONENT_COLS = [
-    "SAP", "HEAT_DEMAND", "CARBON", "TRANSACTION_TYPE", "MECHANICAL_VENTILATION",
-    "SECONDHEAT_DESCRIPTION", "ENERGY_TARIFF", "SOLAR_WATER_HEATING_FLAG", "PHOTO_SUPPLY",
-    "GLAZED_TYPE", "MULTI_GLAZE_PROPORTION", "LOW_ENERGY_LIGHTING", "NUMBER_OPEN_FIREPLACES",
-    "EXTENSION_COUNT", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "DAYS_TO", "estimated_perimeter"
+    "SAP",
+    "HEAT_DEMAND",
+    "CARBON",
+    "TRANSACTION_TYPE",
+    "MECHANICAL_VENTILATION",
+    "SECONDHEAT_DESCRIPTION",
+    "ENERGY_TARIFF",
+    "SOLAR_WATER_HEATING_FLAG",
+    "PHOTO_SUPPLY",
+    "GLAZED_TYPE",
+    "MULTI_GLAZE_PROPORTION",
+    "LOW_ENERGY_LIGHTING",
+    "NUMBER_OPEN_FIREPLACES",
+    "EXTENSION_COUNT",
+    "TOTAL_FLOOR_AREA",
+    "FLOOR_HEIGHT",
+    "DAYS_TO",
+    "estimated_perimeter",
+]
+NO_SUFFIX_COMPONENT_COLS = [
+    "walls_thermal_transmittance",
+    "is_cavity_wall",
+    "is_filled_cavity",
+    "is_solid_brick",
+    "is_system_built",
+    "is_timber_frame",
+    "is_granite_or_whinstone",
+    "is_as_built",
+    "is_cob",
+    "is_sandstone_or_limestone",
+    "is_park_home",
+    "walls_insulation_thickness",
+    "external_insulation",
+    "internal_insulation",
+    "floor_thermal_transmittance",
+    "is_to_unheated_space",
+    "is_to_external_air",
+    "is_suspended",
+    "is_solid",
+    "another_property_below",
+    "floor_insulation_thickness",
+    "roof_thermal_transmittance",
+    "is_pitched",
+    "is_roof_room",
+    "is_loft",
+    "is_flat",
+    "is_thatched",
+    "is_at_rafters",
+    "has_dwelling_above",
+    "roof_insulation_thickness",
+    "heater_type",
+    "system_type",
+    "thermostat_characteristics",
+    "heating_scope",
+    "energy_recovery",
+    "hotwater_tariff_type",
+    "extra_features",
+    "chp_systems",
+    "distribution_system",
+    "no_system_present",
+    "appliance",
+    "has_radiators",
+    "has_fan_coil_units",
+    "has_pipes_in_screed_above_insulation",
+    "has_pipes_in_insulated_timber_floor",
+    "has_pipes_in_concrete_slab",
+    "has_boiler",
+    "has_air_source_heat_pump",
+    "has_room_heaters",
+    "has_electric_storage_heaters",
+    "has_warm_air",
+    "has_electric_underfloor_heating",
+    "has_electric_ceiling_heating",
+    "has_community_scheme",
+    "has_ground_source_heat_pump",
+    "has_no_system_present",
+    "has_portable_electric_heaters",
+    "has_water_source_heat_pump",
+    "has_electric_heat_pump",
+    "has_micro-cogeneration",
+    "has_solar_assisted_heat_pump",
+    "has_exhaust_source_heat_pump",
+    "has_community_heat_pump",
+    "has_electric",
+    "has_mains_gas",
+    "has_wood_logs",
+    "has_coal",
+    "has_oil",
+    "has_wood_pellets",
+    "has_anthracite",
+    "has_dual_fuel_mineral_and_wood",
+    "has_smokeless_fuel",
+    "has_lpg",
+    "has_b30k",
+    "has_electricaire",
+    "has_assumed_for_most_rooms",
+    "has_underfloor_heating",
+    "thermostatic_control",
+    "charging_system",
+    "switch_system",
+    "no_control",
+    "dhw_control",
+    "community_heating",
+    "multiple_room_thermostats",
+    "auxiliary_systems",
+    "trvs",
+    "rate_control",
+    "glazing_type",
+    "fuel_type",
+    "main-fuel_tariff_type",
+    "is_community",
+    "no_individual_heating_or_community_network",
+    "complex_fuel_type",
 ]
-NO_SUFFIX_COMPONENT_COLS = ['walls_thermal_transmittance', 'is_cavity_wall',
-                            'is_filled_cavity', 'is_solid_brick', 'is_system_built', 'is_timber_frame',
-                            'is_granite_or_whinstone', 'is_as_built', 'is_cob', 'is_sandstone_or_limestone',
-                            'is_park_home', 'walls_insulation_thickness', 'external_insulation', 'internal_insulation',
-                            'floor_thermal_transmittance', 'is_to_unheated_space', 'is_to_external_air', 'is_suspended',
-                            'is_solid', 'another_property_below', 'floor_insulation_thickness',
-                            'roof_thermal_transmittance', 'is_pitched', 'is_roof_room', 'is_loft', 'is_flat',
-                            'is_thatched', 'is_at_rafters', 'has_dwelling_above', 'roof_insulation_thickness',
-                            'heater_type', 'system_type', 'thermostat_characteristics', 'heating_scope',
-                            'energy_recovery',
-                            'hotwater_tariff_type', 'extra_features', 'chp_systems', 'distribution_system',
-                            'no_system_present', 'appliance', 'has_radiators', 'has_fan_coil_units',
-                            'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
-                            'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
-                            'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
-                            'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
-                            'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
-                            'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
-                            'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas',
-                            'has_wood_logs', 'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite',
-                            'has_dual_fuel_mineral_and_wood', 'has_smokeless_fuel', 'has_lpg', 'has_b30k',
-                            'has_electricaire', 'has_assumed_for_most_rooms', 'has_underfloor_heating',
-                            'thermostatic_control', 'charging_system', 'switch_system', 'no_control', 'dhw_control',
-                            'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
-                            'rate_control',
-                            'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
-                            'no_individual_heating_or_community_network', 'complex_fuel_type',
-                            ]

 ENDING_SUFFIX_COMPONENT_COLS = [
-    'SAP', 'HEAT_DEMAND', 'CARBON', 'TRANSACTION_TYPE', 'MECHANICAL_VENTILATION', 'SECONDHEAT_DESCRIPTION',
-    'ENERGY_TARIFF', 'SOLAR_WATER_HEATING_FLAG', 'PHOTO_SUPPLY', 'GLAZED_TYPE', 'MULTI_GLAZE_PROPORTION',
-    'LOW_ENERGY_LIGHTING', 'NUMBER_OPEN_FIREPLACES', 'EXTENSION_COUNT', 'TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT',
-    'DAYS_TO', 'walls_thermal_transmittance', 'is_park_home', 'walls_insulation_thickness',
-    'external_insulation', 'internal_insulation', 'floor_thermal_transmittance', 'floor_insulation_thickness',
-    'roof_thermal_transmittance', 'roof_insulation_thickness', 'heater_type', 'system_type',
-    'thermostat_characteristics', 'heating_scope', 'energy_recovery', 'hotwater_tariff_type', 'extra_features',
-    'chp_systems', 'distribution_system', 'no_system_present', 'appliance', 'has_radiators',
-    'has_fan_coil_units', 'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
-    'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
-    'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
-    'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
-    'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
-    'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
-    'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas', 'has_wood_logs',
-    'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite', 'has_dual_fuel_mineral_and_wood',
-    'has_smokeless_fuel', 'has_lpg', 'has_b30k', 'has_electricaire', 'has_assumed_for_most_rooms',
-    'has_underfloor_heating', 'thermostatic_control', 'charging_system', 'switch_system', 'no_control',
-    'dhw_control', 'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
-    'rate_control', 'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
-    'no_individual_heating_or_community_network', 'complex_fuel_type', 'estimated_perimeter'
+    "SAP",
+    "HEAT_DEMAND",
+    "CARBON",
+    "TRANSACTION_TYPE",
+    "MECHANICAL_VENTILATION",
+    "SECONDHEAT_DESCRIPTION",
+    "ENERGY_TARIFF",
+    "SOLAR_WATER_HEATING_FLAG",
+    "PHOTO_SUPPLY",
+    "GLAZED_TYPE",
+    "MULTI_GLAZE_PROPORTION",
+    "LOW_ENERGY_LIGHTING",
+    "NUMBER_OPEN_FIREPLACES",
+    "EXTENSION_COUNT",
+    "TOTAL_FLOOR_AREA",
+    "FLOOR_HEIGHT",
+    "DAYS_TO",
+    "walls_thermal_transmittance",
+    "is_park_home",
+    "walls_insulation_thickness",
+    "external_insulation",
+    "internal_insulation",
+    "floor_thermal_transmittance",
+    "floor_insulation_thickness",
+    "roof_thermal_transmittance",
+    "roof_insulation_thickness",
+    "heater_type",
+    "system_type",
+    "thermostat_characteristics",
+    "heating_scope",
+    "energy_recovery",
+    "hotwater_tariff_type",
+    "extra_features",
+    "chp_systems",
+    "distribution_system",
+    "no_system_present",
+    "appliance",
+    "has_radiators",
+    "has_fan_coil_units",
+    "has_pipes_in_screed_above_insulation",
+    "has_pipes_in_insulated_timber_floor",
+    "has_pipes_in_concrete_slab",
+    "has_boiler",
+    "has_air_source_heat_pump",
+    "has_room_heaters",
+    "has_electric_storage_heaters",
+    "has_warm_air",
+    "has_electric_underfloor_heating",
+    "has_electric_ceiling_heating",
+    "has_community_scheme",
+    "has_ground_source_heat_pump",
+    "has_no_system_present",
+    "has_portable_electric_heaters",
+    "has_water_source_heat_pump",
+    "has_electric_heat_pump",
+    "has_micro-cogeneration",
+    "has_solar_assisted_heat_pump",
+    "has_exhaust_source_heat_pump",
+    "has_community_heat_pump",
+    "has_electric",
+    "has_mains_gas",
+    "has_wood_logs",
+    "has_coal",
+    "has_oil",
+    "has_wood_pellets",
+    "has_anthracite",
+    "has_dual_fuel_mineral_and_wood",
+    "has_smokeless_fuel",
+    "has_lpg",
+    "has_b30k",
+    "has_electricaire",
+    "has_assumed_for_most_rooms",
+    "has_underfloor_heating",
+    "thermostatic_control",
+    "charging_system",
+    "switch_system",
+    "no_control",
+    "dhw_control",
+    "community_heating",
+    "multiple_room_thermostats",
+    "auxiliary_systems",
+    "trvs",
+    "rate_control",
+    "glazing_type",
+    "fuel_type",
+    "main-fuel_tariff_type",
+    "is_community",
+    "no_individual_heating_or_community_network",
+    "complex_fuel_type",
+    "estimated_perimeter",
 ]

 # We found that without performing any filtering, the bottom 0.5% of homes had a floor height of 1.65m. We'll therefore
--- a/etl/epc/testfile.csv
+++ b/etl/epc/testfile.csv
--- a/etl/epc/tests/test_epcrecord.py
+++ b/etl/epc/tests/test_epcrecord.py
@ -0,0 +1,358 @@
+import pytest
+from utils.s3 import read_dataframe_from_s3_parquet
+from etl.epc.Record import EPCRecord
+from etl.epc.settings import DATA_ANOMALY_MATCHES
+import random
+
+
+class TestEpcRecord:
+
+    @pytest.fixture()
+    def cleaning_data(self):
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        return cleaning_data
+
+    @pytest.fixture()
+    def epc_records_1(self):
+        epc_records_1 = {
+            'original_epc': {
+                'low-energy-fixed-light-count': '', 'address': '139 School Road, Hall Green',
+                'uprn-source': 'Energy Assessor', 'floor-height': '2.6', 'heating-cost-potential': '1138',
+                'unheated-corridor-length': '', 'hot-water-cost-potential': '175',
+                'construction-age-band': 'England and Wales: 1900-1929', 'potential-energy-rating': 'B',
+                'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good',
+                'environment-impact-potential': '82', 'glazed-type': 'double glazing, unknown install date',
+                'heating-cost-current': '2711', 'address3': '',
+                'mainheatcont-description': 'Programmer, TRVs and bypass',
+                'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Birmingham',
+                'fixed-lighting-outlets-count': '11', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+                'hot-water-cost-current': '310', 'county': '', 'postcode': 'B28 8JF', 'solar-water-heating-flag': 'N',
+                'constituency': 'E14000562', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4',
+                'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '107',
+                'local-authority': 'E08000025', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+                'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2023-07-05',
+                'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '65', 'address1': '139 School Road',
+                'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Birmingham, Hall Green',
+                'roof-energy-eff': 'Average', 'total-floor-area': '103.0', 'building-reference-number': '10004697322',
+                'environment-impact-current': '43', 'co2-emissions-current': '6.7',
+                'roof-description': 'Pitched, 100 mm loft insulation', 'floor-energy-eff': 'N/A',
+                'number-habitable-rooms': '4', 'address2': 'Hall Green', 'hot-water-env-eff': 'Good',
+                'posttown': 'BIRMINGHAM', 'mainheatc-energy-eff': 'Average', 'main-fuel': 'mains gas (not community)',
+                'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
+                'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 82% of fixed outlets',
+                'roof-env-eff': 'Average', 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0',
+                'lighting-cost-potential': '182', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+                'main-heating-controls': '', 'lodgement-datetime': '2023-07-13 08:23:07', 'flat-top-storey': '',
+                'current-energy-rating': 'E', 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor',
+                'transaction-type': 'rental', 'uprn': '100070505235', 'current-energy-efficiency': '51',
+                'energy-consumption-current': '366', 'mainheat-description': 'Boiler and radiators, mains gas',
+                'lighting-cost-current': '182', 'lodgement-date': '2023-07-13', 'extension-count': '0',
+                'mainheatc-env-eff': 'Average',
+                'lmk-key': 'c1d137711da433fb3cced74b1a6848da8bbc1159d076455d26d7b4668982601e',
+                'wind-turbine-count': '0',
+                'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '84',
+                'hot-water-energy-eff': 'Good', 'low-energy-lighting': '82',
+                'walls-description': 'Solid brick, as built, no insulation (assumed)',
+                'hotwater-description': 'From main system'}, 'full_sap_epc': {}, 'old_data': []
+        }
+        return epc_records_1
+
+    def test_clean_mechanical_ventilation(self, cleaning_data, epc_records_1):
+        # We have an epc with Natural ventilation - the resulting epc should also have natural ventulation
+
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "mechanical-ventilation": "natural"
+        }
+        record._clean_ventilation()
+
+        assert record.prepared_epc["mechanical-ventilation"] == "natural"
+
+        record2 = EPCRecord(cleaning_data=cleaning_data)
+        record2.prepared_epc = {
+            "mechanical-ventilation": ""
+        }
+
+        record2._clean_ventilation()
+
+        assert record2.prepared_epc["mechanical-ventilation"] is None
+
+        record3 = EPCRecord(cleaning_data=cleaning_data)
+        record3.prepared_epc = {
+            "mechanical-ventilation": None
+        }
+
+        record3._clean_ventilation()
+
+        assert record3.prepared_epc["mechanical-ventilation"] is None
+
+        record4 = EPCRecord(cleaning_data=cleaning_data)
+        record4.prepared_epc = {
+            "mechanical-ventilation": "INVALID"
+        }
+
+        record4._clean_ventilation()
+
+        assert record4.prepared_epc["mechanical-ventilation"] is None
+
+    def test_clean_energy_valid_values(self, cleaning_data, epc_records_1):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "energy-consumption-current": "200",
+            "co2-emissions-current": "5.5"
+        }
+        record._clean_energy()
+
+        assert record.prepared_epc["energy-consumption-current"] == 200.0
+        assert record.prepared_epc["co2-emissions-current"] == 5.5
+
+    def test_clean_energy_empty_values(self, cleaning_data):
+        # We cannot have invalid values so this should raise an exception
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "energy-consumption-current": "",
+            "co2-emissions-current": ""
+        }
+
+        with pytest.raises(ValueError):
+            record._clean_energy()
+
+    def test_clean_built_form_valid_remap(self, cleaning_data, epc_records_1):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        # Assuming "Semi" should be remapped to "Semi-Detached"
+        record.prepared_epc = {
+            "built-form": "Semi-Detached",
+            "property-type": "Flat"  # Assuming this affects the remapping
+        }
+        record._clean_built_form()
+
+        assert record.prepared_epc["built-form"] == "Semi-Detached"
+
+    def test_clean_built_form_anomaly(self, cleaning_data, epc_records_1):
+        record = EPCRecord(cleaning_data=cleaning_data)
+
+        record.prepared_epc = {
+            "built-form": "",
+            "property-type": "Flat"
+        }
+        record._clean_built_form()
+
+        assert record.prepared_epc["built-form"] == "End-Terrace"
+
+    def test_clean_floor_area_valid(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "total-floor-area": "120.5"
+        }
+        record._clean_floor_area()
+
+        assert record.prepared_epc["total-floor-area"] == 120.5
+
+    def test_clean_floor_area_empty(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "total-floor-area": ""
+        }
+        # We have no known case of missing floor area
+        with pytest.raises(ValueError):
+            record._clean_floor_area()
+
+    def test_clean_heat_loss_corridor_valid(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "heat-loss-corridor": "unheated corridor",
+            "unheated-corridor-length": ""
+        }
+        record._clean_heat_loss_corridor()
+
+        assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
+
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "heat-loss-corridor": "unheated corridor",
+            "unheated-corridor-length": None
+        }
+        record._clean_heat_loss_corridor()
+
+        assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
+        assert record.prepared_epc["unheated-corridor-length"] is None
+
+    def test_clean_heat_loss_corridor_anomaly(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        # Assuming "InvalidCorridor" is an anomaly
+        record.prepared_epc = {
+            "heat-loss-corridor": "InvalidCorridor",
+            "unheated-corridor-length": ""
+        }
+        record._clean_heat_loss_corridor()
+
+        assert record.prepared_epc["heat-loss-corridor"] == "no corridor"
+
+    def test_clean_mains_gas_valid(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "mains-gas-flag": "Y"
+        }
+        record._clean_mains_gas()
+
+        assert record.prepared_epc["mains-gas-flag"] is True
+
+    def test_clean_mains_gas_anomaly(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "mains-gas-flag": "InvalidValue"
+        }
+        # It should always be Y or N or an anomally value
+        with pytest.raises(KeyError):
+            record._clean_mains_gas()
+
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "mains-gas-flag": random.choice(list(DATA_ANOMALY_MATCHES))
+        }
+        record._clean_mains_gas()
+
+        assert record.prepared_epc["mains-gas-flag"] is None
+
+    def test_clean_solar_hot_water_valid(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "solar-water-heating-flag": "Y"
+        }
+        record._clean_solar_hot_water()
+
+        assert record.prepared_epc["solar-water-heating-flag"] == "Y"
+        assert record.solar_water_heating_flag_bool is True
+
+    def test_clean_solar_hot_water_empty(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.prepared_epc = {
+            "solar-water-heating-flag": ""
+        }
+        record._clean_solar_hot_water()
+
+        assert record.prepared_epc["solar-water-heating-flag"] == "N"
+        assert record.solar_water_heating_flag_bool is False
+
+    def test_clean_number_lighting_outlets_valid(self, cleaning_data, epc_records_1):
+        record = EPCRecord(cleaning_data=cleaning_data, epc_records=epc_records_1)
+        record.prepared_epc = {
+            "fixed-lighting-outlets-count": "5"
+        }
+        record._clean_number_lighting_outlets()
+
+        assert record.prepared_epc["fixed-lighting-outlets-count"] == 5.0
+
+    def test_clean_number_lighting_outlets_empty(self, cleaning_data, epc_records_1):
+        record = EPCRecord(cleaning_data=cleaning_data)
+        record.run_mode = "newdata"
+        record.prepared_epc = {
+            "fixed-lighting-outlets-count": "",
+            "property-type": "Flat",
+            "built-form": "Semi-Detached",
+            "construction-age-band": "England and Wales: 1900-1929",
+            "local-authority": "E08000025",
+            "number-habitable-rooms": "4",
+            "number-heated-rooms": "4",
+        }
+        record.old_data = []
+        record.full_sap_epc = []
+        record._clean_number_lighting_outlets()
+
+        assert record.prepared_epc["fixed-lighting-outlets-count"] == 8.0
+
+    def test_clean_count_variables(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+
+        record.prepared_epc = {
+            "number-open-fireplaces": "1",
+            "extension-count": None,
+            "flat-storey-count": "",
+            "number-habitable-rooms": "INVALID!",
+        }
+
+        record._clean_count_variables()
+
+        assert record.prepared_epc["number-open-fireplaces"] == 1.0
+        assert record.prepared_epc["extension-count"] == 0
+        assert record.prepared_epc["flat-storey-count"] is None
+        assert record.prepared_epc["number-habitable-rooms"] is None
+
+    def test_clean_floor_level(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+
+        record.prepared_epc = {
+            "floor-level": "1",
+        }
+
+        record._clean_floor_level()
+
+        assert record.prepared_epc["floor-level"] == 1.0
+
+        record = EPCRecord(cleaning_data=cleaning_data)
+
+        record.prepared_epc = {
+            "floor-level": "",
+        }
+
+        record._clean_floor_level()
+
+        assert record.prepared_epc["floor-level"] is None
+
+        record = EPCRecord(cleaning_data=cleaning_data)
+
+        record.prepared_epc = {
+            "floor-level": None,
+        }
+
+        record._clean_floor_level()
+
+        assert record.prepared_epc["floor-level"] is None
+
+    def test_clean_solar_hot_water(self, cleaning_data):
+        record = EPCRecord(cleaning_data=cleaning_data)
+
+        record.prepared_epc = {
+            "solar-water-heating-flag": "Y",
+        }
+
+        record._clean_solar_hot_water()
+
+        assert record.prepared_epc["solar-water-heating-flag"] == "Y"
+        assert record.solar_water_heating_flag_bool is True
+
+        record = EPCRecord(cleaning_data=cleaning_data)
+
+        record.prepared_epc = {
+            "solar-water-heating-flag": "N",
+        }
+
+        record._clean_solar_hot_water()
+
+        assert record.prepared_epc["solar-water-heating-flag"] == "N"
+        assert record.solar_water_heating_flag_bool is False
+
+        record = EPCRecord(cleaning_data=cleaning_data)
+
+        record.prepared_epc = {
+            "solar-water-heating-flag": "",
+        }
+
+        record._clean_solar_hot_water()
+
+        assert record.prepared_epc["solar-water-heating-flag"] == "N"
+        assert record.solar_water_heating_flag_bool is False
+
+        record = EPCRecord(cleaning_data=cleaning_data)
+
+        record.prepared_epc = {
+            "solar-water-heating-flag": None,
+        }
+
+        record._clean_solar_hot_water()
+
+        assert record.prepared_epc["solar-water-heating-flag"] == "N"
+        assert record.solar_water_heating_flag_bool is False
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@ -35,9 +35,12 @@ def app():

    cleaned_data = {}
    epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
-    for directory in tqdm(epc_directories):

+    WALLS = []
+    for directory in tqdm(epc_directories):
        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+        z = data["WALLS_DESCRIPTION"].unique().tolist()
+        WALLS.extend(z)
        # Rename the columns to the same format as the api returns
        data.columns = [c.replace("_", "-").lower() for c in data.columns]
        # Take just date before the date threshold
--- a/etl/epc_clean/epc_attributes/MainheatAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py
@ -61,7 +61,8 @@ class MainHeatAttributes(Definitions):
    REMAP = {
        "electric ceiling": "electric ceiling heating",
        "electric heat pumps": "electric heat pump",
-        "solar-assisted heat pump": "solar assisted heat pump"
+        "solar-assisted heat pump": "solar assisted heat pump",
+        "portable electric heating": "portable electric heaters",
    }

    edge_case_result = {}
@ -138,6 +139,8 @@ class MainHeatAttributes(Definitions):
        result.update({f'has_{ft.replace(" ", "_")}': False for ft in self.FUEL_TYPES})
        result.update({f'has_{ot.replace(" ", "_")}': False for ot in self.OTHERS})
        result['has_underfloor_heating'] = False
+        # We re-map entries that are the same
+        # We just drop those keys

        if self.nodata:
            return result
--- a/etl/epc_clean/epc_attributes/RoofAttributes.py
+++ b/etl/epc_clean/epc_attributes/RoofAttributes.py
@ -33,6 +33,12 @@ class RoofAttributes(Definitions):
        "ystafell(oedd) to, dim inswleiddio": "roof room(s), no insulation",
    }

+    DEFAULT_KEYS = [
+        'thermal_transmittance', 'thermal_transmittance_unit', 'is_pitched', 'is_roof_room',
+        'is_loft', 'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed', 'has_dwelling_above',
+        'is_valid', 'insulation_thickness'
+    ]
+
    def __init__(self, description: str):
        """
        :param description: Description of the roof.
@ -95,6 +101,8 @@ class RoofAttributes(Definitions):
        result: Dict[str, Union[float, str, bool, None]] = {}

        if self.nodata:
+            for key in self.DEFAULT_KEYS:
+                result[key] = False
            return result

        description = self.description
@ -114,6 +122,13 @@ class RoofAttributes(Definitions):
        result["is_valid"] = "invalid" not in description
        description = description.replace("invalid", "")

+        # We handle an edge case where the description is "pitched, 150  loft insulation" and is missing the mm
+        if result["is_pitched"] or result["is_loft"]:
+            # Search for a regular expression that matches 150   insulation
+            match = re.search(r"(\d+\+?)\s*insulation", description)
+            if match:
+                result['insulation_thickness'] = match.group(1)
+
        # insulation thickness
        thickness_map = {
            "ceiling insulated": "average",
@ -129,11 +144,11 @@ class RoofAttributes(Definitions):
                # Remove the match from the description
                # description = description.replace(key, "")
                break
-        else:
-            # Extract insulation thickness in mm, if present
-            match = re.search(r'(\d+\+?)\s*mm', description)
-            if match:
-                result['insulation_thickness'] = match.group(1)
+
+        # Extract insulation thickness in mm, if present
+        match = re.search(r'(\d+\+?)\s*mm', description)
+        if match:
+            result['insulation_thickness'] = match.group(1)

        if "insulation_thickness" not in result:
            result['insulation_thickness'] = None
--- a/etl/epc_clean/epc_attributes/WallAttributes.py
+++ b/etl/epc_clean/epc_attributes/WallAttributes.py
@ -68,6 +68,13 @@ class WallAttributes(Definitions):
        'Cowith external insulation': 'Cob, with external insulation',
    }

+    DEFAULT_KEYS = [
+        'thermal_transmittance', 'thermal_transmittance_unit', 'is_cavity_wall', 'is_filled_cavity',
+        'is_solid_brick', 'is_system_built', 'is_timber_frame', 'is_granite_or_whinstone',
+        'is_as_built', 'is_cob', 'is_assumed', 'is_sandstone_or_limestone',
+        'insulation_thickness', 'external_insulation', 'internal_insulation'
+    ]
+
    def __init__(self, description: str):
        """
        :param description: Description of the walls.
@ -98,6 +105,9 @@ class WallAttributes(Definitions):
    def process(self) -> Dict[str, Union[float, str, bool, None]]:
        result: Dict[str, Union[float, str, bool, None]] = {}
        if self.nodata:
+            for key in self.DEFAULT_KEYS:
+                result[key] = False
+
            return result

        description = self.description.lower()
@ -142,4 +152,7 @@ class WallAttributes(Definitions):
            else:
                result["insulation_thickness"] = "average"

+        if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"):
+            result["is_filled_cavity"] = True
+
        return result
--- a/etl/epc_clean/epc_attributes/WindowAttributes.py
+++ b/etl/epc_clean/epc_attributes/WindowAttributes.py
@ -52,7 +52,7 @@ class WindowAttributes(Definitions):
                raise ValueError('Invalid description')

    def process(self) -> Dict[str, Union[str, bool]]:
-        result: Dict[str, Union[str, bool]] = {
+        result: Dict[str, Union[str, bool, None]] = {
            "has_glazing": False,
            "glazing_coverage": None,
            "glazing_type": None,
@ -80,7 +80,11 @@ class WindowAttributes(Definitions):
                        break

        # If we didn't find any coverage or type, we assume full coverage
-        if not result["glazing_coverage"]:
+        if (not result["glazing_coverage"]) & (result["glazing_type"] != "single"):
            result["glazing_coverage"] = "full"

+        # We reset some values if the glazing is single
+        if result["glazing_type"] == "single":
+            result["has_glazing"] = False
+
        return result
--- a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py
@ -1652,4 +1652,17 @@ mainheat_cases = [
     'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
     "has_electric_heat_pumps": False,
     "has_micro-cogeneration": False},
+    {'original_description': 'Portable electric heating assumed for most rooms', 'has_radiators': False,
+     'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
+     'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
+     'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
+     'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
+     'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
+     'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False,
+     'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False,
+     'has_community_heat_pump': False, 'has_portable_electric_heating': True, 'has_electric': True,
+     'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
+     'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
+     'has_b30k': False, 'has_assumed': True, 'has_electricaire': False, 'has_assumed_for_most_rooms': True,
+     'has_underfloor_heating': False}
 ]
--- a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
@ -550,7 +550,7 @@ wall_cases = [
     'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False,
     'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False},
    {'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None,
-     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
+     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
     'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
     'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
     'external_insulation': False, 'internal_insulation': False},
@ -727,7 +727,7 @@ wall_cases = [
     'external_insulation': False, 'internal_insulation': False},
    {'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)',
     'thermal_transmittance': None,
-     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
+     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
     'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
     'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
     'external_insulation': False, 'internal_insulation': False},
--- a/etl/epc_clean/tests/test_data/test_window_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_window_attributes_cases.py
@ -30,7 +30,8 @@ windows_cases = [
     'glazing_type': 'triple', 'no_data': False},
    {'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial',
     'glazing_type': 'triple', 'no_data': False},
-    {'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
+    {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
+     'glazing_type': 'single',
     'no_data': False},
    {'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
     'glazing_type': 'double', 'no_data': False},
@ -46,7 +47,8 @@ windows_cases = [
     'glazing_type': 'double', 'no_data': False},
    {'original_description': 'Gwydrau dwbl gan mwyaf', 'has_glazing': True, 'glazing_coverage': 'most',
     'glazing_type': 'double', 'no_data': False},
-    {'original_description': 'Gwydrau sengl', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
+    {'original_description': 'Gwydrau sengl', 'has_glazing': False, 'glazing_coverage': None,
+     'glazing_type': 'single',
     'no_data': False},
    {'original_description': 'Ffenestri perfformiad uchel', 'has_glazing': True, 'glazing_coverage': 'full',
     'glazing_type': 'high performance', 'no_data': False},
--- a/etl/epc_clean/tests/test_roof_attributes.py
+++ b/etl/epc_clean/tests/test_roof_attributes.py
@ -3,12 +3,13 @@ from pathlib import Path
 from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases
 from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes

+
 # For local testing
-if __file__ == "<input>":
-    input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
-else:
-    current_file_path = Path(__file__)
-    input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
+# if __file__ == "<input>":
+#     input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
+# else:
+#     current_file_path = Path(__file__)
+#     input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'


 class TestRoofAttributes:
@ -88,7 +89,12 @@ class TestRoofAttributes:

    def test_clean_roof_no_description(self):
        roof = RoofAttributes('').process()
-        assert roof == {}
+        assert roof == {
+            'thermal_transmittance': False, 'thermal_transmittance_unit': False, 'is_pitched': False,
+            'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False,
+            'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': False,
+            'insulation_thickness': False
+        }

    def test_clean_roof_edge_cases(self):
        # Insulation thickness edge case
--- a/etl/property_dimensions/app.py
+++ b/etl/property_dimensions/app.py
@ -7,7 +7,7 @@ from pathlib import Path
 import pandas as pd
 from tqdm import tqdm
 from etl.epc.settings import EARLIEST_EPC_DATE
-from etl.epc.DataProcessor import DataProcessor
+from etl.epc.DataProcessor import EPCDataProcessor
 from BaseUtility import Definitions
 from utils.s3 import save_dataframe_to_s3_parquet

@ -21,24 +21,31 @@ BUCKET = os.environ.get("BUCKET", "retrofit-data-dev")
 def app():
    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]

+    sample = []
    for directory in tqdm(directories):
+
        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+
        data = data[data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
        data = data[~pd.isnull(data["UPRN"])]
        data["TOTAL_FLOOR_AREA"] = data["TOTAL_FLOOR_AREA"].astype(float)

        data["CONSTRUCTION_AGE_BAND"] = data["CONSTRUCTION_AGE_BAND"].apply(
-            lambda x: DataProcessor.clean_construction_age_band(x)
+            lambda x: EPCDataProcessor.clean_construction_age_band(x)
        )
        data = data[~pd.isnull(data["CONSTRUCTION_AGE_BAND"])]
        data = data[~data["CONSTRUCTION_AGE_BAND"].isin(Definitions.DATA_ANOMALY_MATCHES)]
        data = data[~pd.isnull(data["TOTAL_FLOOR_AREA"])]
        data = data[~pd.isnull(data["NUMBER_HABITABLE_ROOMS"])]
        data = data[~pd.isnull(data["FLOOR_HEIGHT"])]
+        data = data[~pd.isnull(data["NUMBER_HEATED_ROOMS"])]

        df = (
            data.groupby(GROUPBY)
-            .agg({"NUMBER_HABITABLE_ROOMS": "median", "TOTAL_FLOOR_AREA": "mean", "FLOOR_HEIGHT": "mean"})
+            .agg(
+                {"NUMBER_HEATED_ROOMS": "median", "NUMBER_HABITABLE_ROOMS": "median", "TOTAL_FLOOR_AREA": "mean",
+                 "FLOOR_HEIGHT": "mean"}
+            )
            .reset_index()
        )

--- a/etl/solar/SolarPhotoSupply.py
+++ b/etl/solar/SolarPhotoSupply.py
@ -0,0 +1,244 @@
+import pandas as pd
+from tqdm import tqdm
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class SolarPhotoSupply:
+    DATASET_COLUMNS = [
+        "UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA",
+        "CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"
+    ]
+
+    def __init__(self, file_directories, cleaned_lookup):
+        """
+        Initialize the SolarPhotoSupply class with file directories and a cleaned lookup. Currently, this class
+        just works with locally stored data, but this could be extended to work with data stored in S3.
+
+        :param file_directories: A list of directories where files are stored.
+        :param cleaned_lookup: A dictionary containing cleaned lookup data.
+        """
+        self.file_directories = file_directories
+
+        self.results = []
+        self.decile_thresholds = None
+
+        self.roof_lookup = pd.DataFrame(cleaned_lookup.get("roof-description"))
+
+        self.photo_supply_lookup = pd.DataFrame()
+        self.floor_area_decile_thresholds = pd.DataFrame()
+
+    def create_dataset(self):
+        """
+        Create a dataset from the provided file directories. This method processes the data files,
+        applies transformations, and aggregates data into a useful format.
+        """
+
+        if self.roof_lookup.empty:
+            raise ValueError("No roof lookup data")
+
+        results = []
+
+        logger.info("Creating solar photo supply dataset")
+        for dir in tqdm(self.file_directories):
+            filepath = dir / "certificates.csv"
+            df = pd.read_csv(filepath, low_memory=False)
+            df = df[~pd.isnull(df["UPRN"])]
+            df["UPRN"] = df["UPRN"].astype(int).astype(str)
+            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+                df = df[~pd.isnull(df[col])]
+            # Take newest LODGEMENT_DATE per UPRN
+            df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
+
+            data = df[self.DATASET_COLUMNS].copy()
+            data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0)
+            data = data[data["PHOTO_SUPPLY"] != 0]
+            results.append(data)
+
+        self.results = pd.concat(results)
+
+        # Convert total floor area to deciles
+        self.decile_thresholds = self.results["TOTAL_FLOOR_AREA"].quantile(
+            [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+        ).values
+
+        self.results["floor_area_decile"] = pd.cut(
+            self.results["TOTAL_FLOOR_AREA"],
+            bins=[0] + list(self.decile_thresholds) + [float('inf')],
+            labels=False,
+            include_lowest=True
+        )
+
+        # Convert tenure to lower
+        self.results["TENURE"] = self.results["TENURE"].str.lower()
+
+        self.results = self.results.merge(
+            self.roof_lookup.drop(
+                columns=[
+                    "clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness",
+                    "is_assumed"
+                ]
+            ),
+            left_on="ROOF_DESCRIPTION",
+            right_on="original_description",
+            how="left"
+        )
+
+        self.photo_supply_lookup = self.results.groupby(
+            [
+                "PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat",
+                "CONSTRUCTION_AGE_BAND", "floor_area_decile"
+            ],
+            observed=True
+        ).agg(
+            {
+                "PHOTO_SUPPLY": ["median", "mean"],
+            }
+        ).reset_index()
+
+        self.photo_supply_lookup.columns = ['_'.join(col).strip() for col in self.photo_supply_lookup.columns.values]
+        # Remove trailing underscore from columns
+        self.photo_supply_lookup.columns = [
+            col[:-1] if col.endswith("_") else col for col in self.photo_supply_lookup.columns.values
+        ]
+        # Convert columns to lowercase
+        self.photo_supply_lookup.columns = [col.lower() for col in self.photo_supply_lookup.columns.values]
+
+        self.floor_area_decile_thresholds = pd.DataFrame(
+            self.decile_thresholds,
+            columns=["floor_area_decile_thresholds"]
+        )
+
+    @staticmethod
+    def classify_floor_area(new_area, thresholds):
+        """
+        Classify a given floor area into a decile based on provided thresholds.
+
+        :param new_area: The new floor area to be classified.
+        :param thresholds: A list of thresholds used for classification.
+        :return: An integer representing the decile index.
+        """
+
+        for i, threshold in enumerate(thresholds):
+            if new_area <= threshold:
+                return i  # Returns the decile index (0 to 9)
+        return len(thresholds)
+
+    def save(self):
+        """
+        Save the processed data to an S3 bucket in the parquet format. This method also handles
+        logging and validation to ensure data is present before saving.
+        """
+        if self.photo_supply_lookup.empty:
+            raise ValueError("No data to save")
+
+        logger.info("Storing outputs to S3")
+        # Store this data in s3 as a parquet file
+
+        save_dataframe_to_s3_parquet(
+            df=self.photo_supply_lookup,
+            bucket_name="retrofit-data-dev",
+            file_key="solar_pv_supply/photo_supply_lookup.parquet",
+        )
+
+        save_dataframe_to_s3_parquet(
+            df=self.floor_area_decile_thresholds,
+            bucket_name="retrofit-data-dev",
+            file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet",
+        )
+
+    @staticmethod
+    def load(bucket):
+        """
+        Load datasets from an S3 bucket.
+
+        :param bucket: The name of the S3 bucket to load data from.
+        :return: A tuple containing photo supply lookup and floor area decile thresholds dataframes.
+        """
+        photo_supply_lookup = read_dataframe_from_s3_parquet(
+            bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet",
+        )
+        floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
+            bucket_name=bucket, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
+        )
+
+        return photo_supply_lookup, floor_area_decile_thresholds
+
+    @classmethod
+    def filter_photo_supply_lookup(
+        cls,
+        photo_supply_lookup: pd.DataFrame,
+        floor_area_decile_thresholds: pd.DataFrame,
+        tenure: str,
+        built_form: str,
+        property_type: str,
+        construction_age_band: str,
+        is_flat: bool,
+        is_pitched: bool,
+        is_roof_room: bool,
+        floor_area: float
+    ):
+
+        """
+        Filter the photo supply lookup to find the most appropriate photo supply for a given property.
+        :param photo_supply_lookup: The photo supply lookup dataframe.
+        :param floor_area_decile_thresholds: The floor area decile thresholds dataframe.
+        :param tenure: The tenure of the property.
+        :param built_form: The built form of the property.
+        :param property_type: The property type of the property.
+        :param construction_age_band: The construction age band of the property.
+        :param is_flat: Whether the property has a flat roof.
+        :param is_pitched: Whether the property has a pitched roof.
+        :param is_roof_room: Whether the property has a roof room.
+        :param floor_area: The floor area of the property.
+        :return:
+        """
+
+        # Convert the tenure to lower case, as is done in the creation of the dataset
+        tenure = tenure.lower()
+        # We remap the "not defined"
+        tenure = {
+            "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is not to "
+            "be used for an existing dwelling":
+                "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is no"
+        }.get(tenure, tenure)
+
+        photo_supply_matched = photo_supply_lookup[
+            (photo_supply_lookup["tenure"] == tenure) &
+            (photo_supply_lookup["built_form"] == built_form) &
+            (photo_supply_lookup["property_type"] == property_type) &
+            (photo_supply_lookup["construction_age_band"] == construction_age_band) &
+            (photo_supply_lookup["is_flat"] == is_flat) &
+            (photo_supply_lookup["is_pitched"] == is_pitched) &
+            (photo_supply_lookup["is_roof_room"] == is_roof_room)
+            ]
+
+        if photo_supply_matched.empty:
+            # There are a small number of cases where we don't get a full match so try again with a more aggregated
+            # average
+            photo_supply_matched = photo_supply_lookup[
+                (photo_supply_lookup["tenure"] == tenure) &
+                (photo_supply_lookup["built_form"] == built_form) &
+                (photo_supply_lookup["property_type"] == property_type)
+                ]
+            if construction_age_band in photo_supply_matched["construction_age_band"].values:
+                photo_supply_matched = photo_supply_matched[
+                    photo_supply_matched["construction_age_band"] == construction_age_band
+                    ]
+
+            if photo_supply_matched.empty:
+                raise ValueError("No photo supply matches")
+
+        floor_area_decile = cls.classify_floor_area(
+            floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values
+        )
+
+        if floor_area_decile in photo_supply_matched["floor_area_decile"].values:
+            photo_supply_matched = photo_supply_matched[
+                photo_supply_matched["floor_area_decile"] == floor_area_decile
+                ]
+
+        return photo_supply_matched
--- a/etl/solar/app.py
+++ b/etl/solar/app.py
@ -0,0 +1,31 @@
+from pathlib import Path
+from etl.epc.property_change_app import get_cleaned
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+    """
+    This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which
+    is the following:
+    "Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply
+    is not present in the property."
+
+    When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible
+    figure to increase this to. This script will pull the data for that, to allow us to try and deduce what
+    a sensible figure would be
+    :return:
+    """
+
+    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+    cleaned_lookup = get_cleaned()
+
+    solar_data_client = SolarPhotoSupply(
+        file_directories=directories,
+        cleaned_lookup=cleaned_lookup
+    )
+
+    solar_data_client.create_dataset()
+
+    solar_data_client.save()
--- a/etl/solar/tests/test_solar_photo_supply.py
+++ b/etl/solar/tests/test_solar_photo_supply.py
@ -0,0 +1,109 @@
+import unittest
+import pandas as pd
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
+
+class TestSolarPhotoSupply(unittest.TestCase):
+
+    def setUp(self):
+        # Mock data for photo_supply_lookup and floor_area_decile_thresholds
+        self.photo_supply_lookup = pd.DataFrame({
+            "tenure": ["leasehold", "freehold"],
+            "built_form": ["detached", "semi-detached"],
+            "property_type": ["house", "flat"],
+            "construction_age_band": ["pre-1900", "1900-1929"],
+            "is_flat": [False, True],
+            "is_pitched": [True, False],
+            "is_roof_room": [False, True],
+            "floor_area_decile": [0, 1],
+            "photo_supply": [100, 200]
+        })
+
+        self.floor_area_decile_thresholds = pd.DataFrame({
+            "floor_area_decile_thresholds": [50, 100]
+        })
+
+        self.solar_photo_supply = SolarPhotoSupply([], {})
+
+    def test_correct_filtering(self):
+        result = self.solar_photo_supply.filter_photo_supply_lookup(
+            self.photo_supply_lookup,
+            self.floor_area_decile_thresholds,
+            "leasehold",
+            "detached",
+            "house",
+            "pre-1900",
+            False,
+            True,
+            False,
+            45
+        )
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result.iloc[0]["photo_supply"], 100)
+
+    def test_no_matches(self):
+        with self.assertRaises(ValueError):
+            self.solar_photo_supply.filter_photo_supply_lookup(
+                self.photo_supply_lookup,
+                self.floor_area_decile_thresholds,
+                "leasehold",
+                "unknown",
+                "house",
+                "pre-1900",
+                False,
+                True,
+                False,
+                45
+            )
+
+    def test_floor_area_decile_matching(self):
+        result = self.solar_photo_supply.filter_photo_supply_lookup(
+            self.photo_supply_lookup,
+            self.floor_area_decile_thresholds,
+            "freehold",
+            "semi-detached",
+            "flat",
+            "1900-1929",
+            True,
+            False,
+            True,
+            60
+        )
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result.iloc[0]["photo_supply"], 200)
+
+    def test_invalid_parameters(self):
+        with self.assertRaises(AttributeError):
+            self.solar_photo_supply.filter_photo_supply_lookup(
+                self.photo_supply_lookup,
+                self.floor_area_decile_thresholds,
+                123,  # Invalid type for tenure
+                "detached",
+                "house",
+                "pre-1900",
+                False,
+                True,
+                False,
+                45
+            )
+
+    def test_classify_floor_area(self):
+        # Setup
+        thresholds = [10, 20, 30, 40, 50]
+        solar_photo_supply = SolarPhotoSupply([], {})
+
+        # Test Case 1: Valid floor area
+        floor_area = 25
+        expected_decile = 2
+        result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
+        self.assertEqual(result, expected_decile, "Decile classification did not match expected result")
+
+        # Test Case 2: Out of range floor area
+        floor_area = 60
+        expected_decile = len(thresholds)
+        result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
+        self.assertEqual(result, expected_decile, "Decile classification for out of range value is incorrect")
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/etl/testing_data/birmingham_pilot.py
+++ b/etl/testing_data/birmingham_pilot.py
@ -0,0 +1,179 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import os
+
+import numpy as np
+import pandas as pd
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+FILE_SIZE = 5
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
+USER_ID = 8
+PORTFOLIO_ID = 54
+
+
+def app():
+    # For this dataset, we want 3 properties, all hourses. A mid-terrace, and end-terrace and a semi-detached
+
+    epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
+
+    # Birmingham has a Local Authority Code of E08000025
+
+    # ~~~~~~~~~~~~~~~~~~~~
+    # First example
+    # ~~~~~~~~~~~~~~~~~~~~
+    # Let's take an EPC D property
+    example_1_reponse = epc_client.domestic.search(
+        params={
+            "local-authority": "E08000025",
+            "property-type": "house",
+        },
+        size=1000
+    )
+    example_1_reponse = example_1_reponse["rows"]
+    # Get a property with a cavity wall
+    example_1_reponse_filtered = [
+        x for x in example_1_reponse if
+        "cavity wall, as built, no insulation (assumed)" in x["walls-description"].lower()
+    ]
+    example_1_reponse_filtered = [
+        x for x in example_1_reponse_filtered if "pitched, no insulation (assumed)" in x["roof-description"].lower()
+    ]
+    # Get a social housing property
+    example_1_reponse_filtered = [
+        x for x in example_1_reponse_filtered if x["tenure"] == "Rented (social)"
+    ]
+
+    print(example_1_reponse_filtered[0]["postcode"])
+    # B13 9LT
+    print(example_1_reponse_filtered[0]["address1"])
+    # 113 Tenby Road
+    print(example_1_reponse_filtered[0]["built-form"])
+    # Mid-Terrace
+    print(example_1_reponse_filtered[0]["current-energy-rating"])
+    # 'D'
+
+    # ~~~~~~~~~~~~~~~~~~~~
+    # Second example
+    # ~~~~~~~~~~~~~~~~~~~~
+
+    # Let's take an EPC E property
+    example_2_reponse = epc_client.domestic.search(
+        params={
+            "local-authority": "E08000025",
+            "property-type": "house",
+            "energy-band": "e"
+        },
+        size=1000
+    )
+    example_2_reponse = example_2_reponse["rows"]
+    # Get a solid wall example
+    example_2_reponse_filtered = [
+        x for x in example_2_reponse if
+        "solid brick, as built, no insulation (assumed)" in x["walls-description"].lower()
+    ]
+    # With some existing loft insulation
+    example_2_reponse_filtered = [
+        x for x in example_2_reponse_filtered if "pitched, 100 mm loft insulation" in x["roof-description"].lower()
+    ]
+    # Get a social housing property
+    example_2_reponse_filtered = [
+        x for x in example_2_reponse_filtered if x["tenure"] == "Rented (social)"
+    ]
+
+    print(example_2_reponse_filtered[0]["postcode"])
+    # B28 8JF
+    print(example_2_reponse_filtered[0]["address1"])
+    # 139 School Road
+    print(example_2_reponse_filtered[0]["built-form"])
+    # Semi-Detached
+    print(example_2_reponse_filtered[0]["current-energy-rating"])
+    # E
+
+    # ~~~~~~~~~~~~~~~~~~~~
+    # Third example
+    # ~~~~~~~~~~~~~~~~~~~~
+    example_3_reponse = epc_client.domestic.search(
+        params={
+            "local-authority": "E08000025",
+            "property-type": "house",
+            "energy-band": "f"
+        },
+        size=1000
+    )
+    example_3_reponse = example_3_reponse["rows"]
+    # Get a social housing property]
+    example_3_reponse_filtered = [
+        x for x in example_3_reponse if x["tenure"] == "Rented (social)"
+    ]
+
+    print(example_3_reponse_filtered[4]["walls-description"])
+    print(example_3_reponse_filtered[4]["floor-description"])
+    print(example_3_reponse_filtered[4]["roof-description"])
+    print(example_3_reponse_filtered[4]["postcode"])
+    # B32 1SL
+    print(example_3_reponse_filtered[4]["address1"])
+    # 77 Simmons Drive
+    print(example_3_reponse_filtered[4]["built-form"])
+    # Semi-Detached
+
+    # ~~~~~~~~~~~~~~~~~~~~
+    # Final example
+    # ~~~~~~~~~~~~~~~~~~~~
+    # Let's take a flat that is a D
+    example_4_reponse = epc_client.domestic.search(
+        params={
+            "local-authority": "E08000025",
+            "property-type": "flat",
+            "energy-band": "d"
+        },
+        size=1000
+    )
+    example_4_reponse = example_4_reponse["rows"]
+
+    example_4_reponse_filtered = [
+        x for x in example_4_reponse if
+        "cavity wall, as built, no insulation (assumed)" in x["walls-description"].lower()
+    ]
+    # Get a social housing property
+    example_4_reponse_filtered = [
+        x for x in example_4_reponse_filtered if x["tenure"] == "Rented (social)"
+    ]
+    print(example_4_reponse_filtered[0]["postcode"])
+    # B32 1LS
+    print(example_4_reponse_filtered[0]["address1"])
+    # Flat 2
+
+    print(example_4_reponse_filtered[0]["floor-description"])
+    print(example_4_reponse_filtered[0]["property-type"])
+    # Flat
+
+    test_file = pd.DataFrame(
+        [
+            # New properties
+            {"address": "113 Tenby Road", "postcode": "B13 9LT", "Notes": None},
+            {"address": "139 School Road", "postcode": "B28 8JF", "Notes": None},
+            {"address": "77 Simmons Drive", "postcode": "B32 1SL", "Notes": None},
+            {"address": "Flat 2, 54 Wedgewood Road", "postcode": "B32 1LS", "Notes": None},
+        ]
+    )
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
+    save_csv_to_s3(
+        dataframe=test_file,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename
+    }
+    print(body)
--- a/etl/testing_data/estimate_epc.py
+++ b/etl/testing_data/estimate_epc.py
@ -0,0 +1,194 @@
+from pathlib import Path
+from random import choices, sample
+
+import os
+import pandas as pd
+from tqdm import tqdm
+from dotenv import load_dotenv
+from utils.logger import setup_logger
+from backend.SearchEpc import SearchEpc, vartypes
+from BaseUtility import Definitions
+from etl.epc.settings import BUILT_FORM_REMAP
+
+ENV_FILE = Path(__file__).parent / "backend" / ".env"
+
+logger = setup_logger()
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+DIR_SAMPLE_SIZE = 500
+N_DIRECTORIES = 50
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+load_dotenv(ENV_FILE)
+
+CATETORICALS_TO_IGNORE = [
+    "postcode", "constituency", "local-authority", "built-form", "property-type", "address1", "constituency-label",
+    "building-reference-number", "address2", "posttown", "transaction-type", "lmk-key", "address3",
+    "local-authority-label", "county",
+]
+
+
+def check_numeric_performance(estimated_value, actual_value):
+    # If we don't have anything to compare against, return None
+    if pd.isnull(actual_value):
+        return None
+
+    if pd.isnull(estimated_value):
+        return 1
+
+    if actual_value == 0 and estimated_value == 0:
+        return 0
+
+    if actual_value == 0 and estimated_value != 0:
+        return 1
+
+    return abs(estimated_value - actual_value) / actual_value
+
+
+def app():
+    """
+    This script is used to test the EPC estimation process.
+    """
+
+    numerical_vartypes = {key: value for key, value in vartypes.items() if value in ["float", "Int64"]}
+    str_var_types = {key: value for key, value in vartypes.items() if value == "str"}
+    # Make sure we have missed any keys
+    if len(numerical_vartypes) + len(str_var_types) != len(vartypes):
+        raise ValueError("Not all vartypes have been accounted for")
+
+    # Drop some keys that aren't important
+    for k in CATETORICALS_TO_IGNORE:
+        str_var_types.pop(k, None)
+
+    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+
+    directory_sample = choices(directories, k=N_DIRECTORIES)
+
+    results = []
+
+    for directory in tqdm(directory_sample):
+        filepath = directory / "certificates.csv"
+        df = pd.read_csv(filepath, low_memory=False)
+        df["UPRN"] = df["UPRN"].astype("Int64").astype("str")
+        df = df[~pd.isnull(df["UPRN"])]
+
+        # uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE)
+        # Take a fixed sample based on the first DIR_SAMPLE_SIZE uprns
+        uprn_sample = sorted(df["UPRN"].unique().tolist())[:DIR_SAMPLE_SIZE]
+        df_sample = df[df["UPRN"].isin(uprn_sample)]
+        # Take the record with the newest LODGEMENT_DATETIME by uprn
+        df_sample = df_sample.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+        # Convert the columns to lower case and replace underscores with hyphens, the same as the api
+        df_sample.columns = df_sample.columns.str.lower().str.replace("_", "-")
+
+        # For each epc, we test the estimation process
+        for _, epc in df_sample.iterrows():
+            epc = epc.to_dict()
+            address1 = epc["address1"]
+            postcode = epc["postcode"]
+
+            # Get all EPCs for this urpn and we make sure they get dropped from the estimate_epc function
+            epcs_for_uprn = df[df["UPRN"] == epc["uprn"]]
+            lmks_to_drop = epcs_for_uprn["LMK_KEY"].tolist()
+            searcher = SearchEpc(address1, postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="")
+            searcher.uprn = epc["uprn"]
+
+            # Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
+            # Enclosed End-Terrace
+            built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
+            if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or (
+                built_form in Definitions.DATA_ANOMALY_MATCHES
+            ):
+                built_form = ""
+
+            estimated_epc = searcher.estimate_epc(
+                property_type=epc["property-type"], built_form=built_form, lmks_to_drop=lmks_to_drop
+            )
+
+            # We now compare the difference between the estimated and original
+            # TODO: We can convert windows and lighting to numeric versions and estimate how close we are
+            numeric_performance = {
+                key: check_numeric_performance(estimated_epc[key], epc[key]) for key, value in
+                numerical_vartypes.items()
+            }
+
+            # Remove Nones
+            numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
+            # Get an average
+            numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
+            numeric_success = 1 - numeric_performance
+
+            # categorical performance
+            categorical_performance = {
+                key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
+            }
+            # Get an average
+            categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
+
+            results.append(
+                {
+                    "uprn": epc["uprn"],
+                    "numeric_success": numeric_success,
+                    "categorical_success": categorical_success,
+                    "property_type": epc["property-type"],
+                    "built_form": epc["built-form"],
+                    "tenure": epc["tenure"],
+                }
+            )
+
+    # Get aggregate performance figures
+    results_df = pd.DataFrame(results)
+    results_df["tenure"] = results_df["tenure"].replace("Rented (social)", "rental (social)")
+
+    avg_numeric_succes = results_df["numeric_success"].median()
+    avg_categorical_sucess = results_df["categorical_success"].median()
+
+    # With 20 nearest homes
+    # 0.7718100840549558
+    # 0.5116279069767442
+    # 100 nearest homes
+    # 0.7859617377809409
+    # 0.5348837209302325
+
+    # Fixed sample, sqrt weights
+
+    # Group by tenure
+    by_tenure = results_df.groupby("tenure").agg(
+        {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
+    )
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+
+    # With 20 nearest homes
+    #                                                     numeric_success  categorical_success   uprn
+    # tenure
+    # NO DATA!                                                   0.847840             0.581395    278
+    # Not defined - use in the case of a new dwelling...         0.930282             0.651163    617
+    # Owner-occupied                                             0.770330             0.511628   2588
+    # Rented (private)                                           0.791885             0.558140   1232
+    # owner-occupied                                             0.741088             0.488372  10912
+    # rental (private)                                           0.749064             0.488372   3252
+    # rental (social)                                            0.822109             0.581395   3878
+    # unknown                                                    0.895840             0.627907   1820
+
+    # 100 nearest homes
+    # tenure
+    # NO DATA!                                                   0.899566             0.604651    233
+    # Not defined - use in the case of a new dwelling...         0.927518             0.674419    608
+    # Owner-occupied                                             0.777026             0.511628   3167
+    # Rented (private)                                           0.805646             0.534884   1316
+    # owner-occupied                                             0.762180             0.488372  10835
+    # rental (private)                                           0.760503             0.511628   3181
+    # rental (social)                                            0.830057             0.604651   3705
+    # unknown                                                    0.899948             0.627907   1571
+
+    # By property type - we also want to see how many properties we have for each property type
+    by_property_type = results_df.groupby("property_type").agg(
+        {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
+    )
+    # By property_type & built form
+    by_property_type_built_form = results_df.groupby(["property_type", "built_form"]).agg(
+        {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
+    )
--- a/etl/testing_data/livewest_pilot.py
+++ b/etl/testing_data/livewest_pilot.py
@ -0,0 +1,38 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import os
+
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
+USER_ID = 8
+PORTFOLIO_ID = 61
+
+
+def app():
+    pilot_file = pd.DataFrame(
+        [
+            {"address": "42, Foxes Field", "postcode": "TR18 3RJ", "Notes": None},
+            {"address": "11, Cranley Gardens", "postcode": "TQ13 8UT", "Notes": None},
+        ]
+    )
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/livewest_pilot_file.csv"
+    save_csv_to_s3(
+        dataframe=pilot_file,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename
+    }
+    print(body)
--- a/etl/testing_data/no_epc_input.py
+++ b/etl/testing_data/no_epc_input.py
@ -0,0 +1,42 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 57
+
+
+def app():
+    """
+    This portfolio is for testing windows recommendations
+    :return:
+    """
+
+    test_file = pd.DataFrame(
+        [
+            {"address": "21 Butler House", "postcode": "E2 0PN", "Notes": None},
+            {"address": "22 Butler House", "postcode": "E2 0PN", "Notes": None},
+            {"address": "23 Butler House", "postcode": "E2 0PN", "Notes": None},
+            {"address": "24 Butler House", "postcode": "E2 0PN", "Notes": None},
+        ]
+    )
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/no_epc.csv"
+    save_csv_to_s3(
+        dataframe=test_file,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename
+    }
+    print(body)
--- a/etl/testing_data/retrofitted_properties.py
+++ b/etl/testing_data/retrofitted_properties.py
@ -0,0 +1,61 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 62
+
+
+def app():
+    """
+    This portfolio contains propertyies that we have demo'd in pilots, or properties that were provided to us
+    as proprties that are being treated under funding scehemes and we have pre/post EPRs for
+    :return:
+    """
+
+    test_file = pd.DataFrame(
+        [
+            # Live West Properties
+            {"address": "42, Foxes Field", "postcode": "TR18 3RJ", "Notes": None},
+            {"address": "11, Cranley Gardens", "postcode": "TQ13 8UT", "Notes": None},
+            # Keyzy properties
+            {'address': '2 South Terrace', 'postcode': 'NN1 5JY', 'Notes': ''},
+            {'address': '25 Albert Street', 'postcode': 'PO12 4TY', 'Notes': ''},
+            # Pilot properties
+            {'address': '113 Tenby Road', 'postcode': 'B13 9LT', 'Notes': ''},
+            {'address': '139 School Road', 'postcode': 'B28 8JF', 'Notes': ''},
+            {'address': '77 Simmons Drive', 'postcode': 'B32 1SL', 'Notes': ''},
+            {'address': 'Flat 2, 54 Wedgewood Road', 'postcode': 'B32 1LS', 'Notes': ''},
+            # Warmfront ECO4 Properties
+            {'address': '73 Long Chaulden', 'postcode': 'HP1 2HX', 'Notes': ''},
+            {'address': '8 Lindlings', 'postcode': 'HP1 2HA', 'Notes': ''},
+            {'address': '44 Lindlings', 'postcode': 'HP1 2HE', 'Notes': ''},
+            {'address': '46 Chaulden Terrace', 'postcode': 'HP1 2AN', 'Notes': ''},
+            # Osmosis SHDF Properties
+            {'address': '4, Heather Shaw', 'postcode': 'BA14 7JS', 'Notes': ''},
+            {'address': '16 Glastonbury Road', 'postcode': 'M32 9PE', 'Notes': ''},
+            {'address': '31 Loddon Way', 'postcode': 'BA15 1HG', 'Notes': ''},
+            {'address': '62 Pearmain Drive', 'postcode': 'NG3 3DJ', 'Notes': ''},
+        ]
+
+    )
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/eco4_shdf_retrofits.csv"
+    save_csv_to_s3(
+        dataframe=test_file,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename
+    }
+    print(body)
--- a/etl/testing_data/sap_model_simulation.py
+++ b/etl/testing_data/sap_model_simulation.py
--- a/etl/testing_data/the_guiness_partnership_pilot.py
+++ b/etl/testing_data/the_guiness_partnership_pilot.py
@ -0,0 +1,38 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import os
+
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
+USER_ID = 8
+PORTFOLIO_ID = 59
+
+
+def app():
+    pilot_file = pd.DataFrame(
+        [
+            {"address": "10 Elm Close", "postcode": "CV37 8XL", "Notes": None},
+            {"address": "21, Spring Lane", "postcode": "MK17 0QP", "Notes": None},
+        ]
+    )
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/the_guiness_partnership_pilot_file.csv"
+    save_csv_to_s3(
+        dataframe=pilot_file,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename
+    }
+    print(body)
--- a/etl/testing_data/windows_portfolio.py
+++ b/etl/testing_data/windows_portfolio.py
@ -0,0 +1,43 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 56
+
+
+def app():
+    """
+    This portfolio is for testing windows recommendations
+    :return:
+    """
+
+    test_file = pd.DataFrame(
+        [
+            {"address": "3 Church Terrace", "postcode": "LE13 0PW", "Notes": None},
+            {"address": "3, Main Street, Redmile", "postcode": "NG13 0GA", "Notes": None},
+            {"address": "Manor House, Kennel Lane, Reepham", "postcode": "LN3 4DZ", "Notes": None},
+            {"address": "13 Main Street", "postcode": "LE14 2JU", "Notes": None},
+            {"address": "8 The Crescent, Coston Road, Buckminster", "postcode": "NG33 5SF", "Notes": None},
+        ]
+    )
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/windows_portfolio_inputs.csv"
+    save_csv_to_s3(
+        dataframe=test_file,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename
+    }
+    print(body)
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@ -181,4 +181,16 @@ module "lambda_carbon_prediction_ecr" {
 module "lambda_heat_prediction_ecr" {
  ecr_name = "lambda-heat-prediction-${var.stage}"
  source   = "./modules/ecr"
+}
+
+##############################################
+# CDN - Cloudfront
+##############################################
+module "cloudfront_distribution" {
+  source             = "./modules/cloudfront"
+  bucket_name        = module.s3.bucket_name
+  bucket_id          = module.s3.bucket_id
+  bucket_arn         = module.s3.bucket_arn
+  bucket_domain_name = module.s3.bucket_domain_name
+  stage              = var.stage
 }
--- a/infrastructure/terraform/modules/cloudfront/main.tf
+++ b/infrastructure/terraform/modules/cloudfront/main.tf
@ -0,0 +1,65 @@
+resource "aws_cloudfront_distribution" "s3_distribution" {
+  origin {
+    domain_name = var.bucket_domain_name
+    origin_id   = "S3-${var.bucket_name}"
+
+    s3_origin_config {
+      origin_access_identity = aws_cloudfront_origin_access_identity.oai.cloudfront_access_identity_path
+    }
+  }
+
+  enabled = true
+
+  default_cache_behavior {
+    allowed_methods        = ["GET", "HEAD"]
+    cached_methods         = ["GET", "HEAD"]
+    target_origin_id       = "S3-${var.bucket_name}"
+    viewer_protocol_policy = "redirect-to-https"
+    compress               = true
+
+    forwarded_values {
+      query_string = false
+      cookies {
+        forward = "none"
+      }
+    }
+
+    min_ttl     = 0
+    default_ttl = 86400
+    max_ttl     = 31536000
+  }
+
+  price_class = "PriceClass_All"
+
+  restrictions {
+    geo_restriction {
+      restriction_type = "none"
+    }
+  }
+
+  viewer_certificate {
+    cloudfront_default_certificate = true
+  }
+}
+
+resource "aws_cloudfront_origin_access_identity" "oai" {
+  comment = "OAI for ${var.bucket_name}"
+}
+
+resource "aws_s3_bucket_policy" "bucket_policy" {
+  bucket = var.bucket_id
+
+  policy = jsonencode({
+    Version   = "2012-10-17"
+    Statement = [
+      {
+        Effect    = "Allow"
+        Principal = {
+          AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
+        }
+        Action   = "s3:GetObject"
+        Resource = "${var.bucket_arn}/*"
+      },
+    ]
+  })
+}
--- a/infrastructure/terraform/modules/cloudfront/variables.tf
+++ b/infrastructure/terraform/modules/cloudfront/variables.tf
@ -0,0 +1,24 @@
+variable "bucket_name" {
+  description = "The name of the bucket"
+  type        = string
+}
+
+variable "stage" {
+  description = "The deployment stage"
+  type        = string
+}
+
+variable "bucket_id" {
+  description = "The ID of the S3 bucket"
+  type        = string
+}
+
+variable "bucket_arn" {
+  description = "The ARN of the S3 bucket"
+  type        = string
+}
+
+variable "bucket_domain_name" {
+  description = "The regional domain name of the S3 bucket"
+  type        = string
+}
--- a/infrastructure/terraform/modules/s3/outputs.tf
+++ b/infrastructure/terraform/modules/s3/outputs.tf
@ -2,3 +2,15 @@ output "bucket_name" {
  description = "The name of the S3 bucket"
  value       = aws_s3_bucket.bucket.bucket
 }
+
+output "bucket_id" {
+  value = aws_s3_bucket.bucket.id
+}
+
+output "bucket_arn" {
+  value = aws_s3_bucket.bucket.arn
+}
+
+output "bucket_domain_name" {
+  value = aws_s3_bucket.bucket.bucket_regional_domain_name
+}
--- a/keyzy_pilot.csv
+++ b/keyzy_pilot.csv
@ -0,0 +1,3 @@
+address,postcode,Notes,,,,
+2 South Terrace,NN1 5JY,,,,,
+25 Albert Street,PO12 4TY,,,,,
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@ -1,27 +1,96 @@
 import numpy as np
+from recommendations.county_to_region import county_to_region_map

-# This data comes from SPONs
+# This data comes from SPONs 2023
 regional_labour_variations = [
-    {"Region": "Outer London (Spon’s 2023)", "Adjustment_Factor": 1.00},
+    {"Region": "Outer London", "Adjustment_Factor": 1.00},
    {"Region": "Inner London", "Adjustment_Factor": 1.05},
-    {"Region": "South East", "Adjustment_Factor": 0.96},
-    {"Region": "South West", "Adjustment_Factor": 0.90},
+    {"Region": "South East England", "Adjustment_Factor": 0.96},
+    {"Region": "South West England", "Adjustment_Factor": 0.90},
    {"Region": "East of England", "Adjustment_Factor": 0.93},
    {"Region": "East Midlands", "Adjustment_Factor": 0.88},
    {"Region": "West Midlands", "Adjustment_Factor": 0.87},
-    {"Region": "North East", "Adjustment_Factor": 0.83},
-    {"Region": "North West", "Adjustment_Factor": 0.88},
-    {"Region": "Yorkshire and Humberside", "Adjustment_Factor": 0.86},
+    {"Region": "North East England", "Adjustment_Factor": 0.83},
+    {"Region": "North West England", "Adjustment_Factor": 0.88},
+    {"Region": "Yorkshire and the Humber", "Adjustment_Factor": 0.86},
    {"Region": "Wales", "Adjustment_Factor": 0.88},
    {"Region": "Scotland", "Adjustment_Factor": 0.88},
    {"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
 ]

-county_map = {
-    "Northamptonshire": "East Midlands",
-    "Hampshire": "South East",
+# This data is based on the MCS database
+MCS_SOLAR_PV_COST_DATA = {
+    "last_updated": "2024-01-04",
+    "average_cost_per_kwh": 2013.94,
+    "average_cost_per_kwh-Outer London": 2618.75,
+    "average_cost_per_kwh-Inner London": 2618.75,
+    "average_cost_per_kwh-South East England": 2083.33,
+    "average_cost_per_kwh-South West England": 2113,
+    "average_cost_per_kwh-East of England": 1973.86,
+    "average_cost_per_kwh-East Midlands": 1981.86,
+    "average_cost_per_kwh-West Midlands": 1926.55,
+    "average_cost_per_kwh-North East England": 2028.49,
+    "average_cost_per_kwh-North West England": 1620.42,
+    "average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
+    "average_cost_per_kwh-Wales": 1898.83,
+    "average_cost_per_kwh-Scotland": 1967.97,
+    "average_cost_per_kwh-Northern Ireland": 2126.09,
 }

+# This is based on quotes from installers
+BATTERY_COST = 3500
+
+# This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
+SMART_APPLIANCE_THERMOSTAT_COST = 400
+PROGRAMMER_COST = 120
+ROOM_THERMOSTAT_COST = 150
+TRVS_COST = 35
+
+# Cost for TTZC
+# Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
+# Based on the Nest system
+TTZC_SMART_THERMOSTAT_COST = 205
+TTZC_SMART_THERMOSTAT_LABOUR_HOURS = 2
+TTZC_ELECTRICIAN_HOURLY_RATE = 45
+# Based on cost of a Nest temperature sensor
+TTZC_ROOM_TEMPERATURE_SENSOR_COST = 50
+TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17  # (Assume ~ 10 mins install per sensor)
+# Basedon an average cost of smart radiator values
+TTZC_SMART_RADIATOR_VALUES = 50
+TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37  # (Assume ~ 15-30 mins install per valve)
+
+# Low carbon combi boiler - median value based on £2200 - £3000 range
+LOW_CARBON_COMBI_BOILER = 2200
+
+# boiler prices based on
+# https://www.greenmatch.co.uk/boilers/30kw-boiler
+# https://www.greenmatch.co.uk/boilers/35kw-boiler
+# https://www.greenmatch.co.uk/boilers/40kw-boiler
+# These are exclusive of installation costs
+COMBI_BOILER_COSTS = {
+    "30kw": 1550,
+    "35kw": 1610,
+    "40kw": 1625
+}
+
+CONVENTIONAL_BOILER_COSTS = {
+    "30kw": 1117,
+    "35kw": 1546,
+    "40kw": 1776
+}
+
+# Assumes 3 hours to remove each heater (including re-decorating)
+ROOM_HEATER_REMOVAL_COST = 120
+ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
+
+# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
+SYSTEM_FLUSH_COST = 250
+
+SINGLE_RADIATOR_COST = 150
+DOUBLE_RADIATOR_COST = 300
+FLUE_COST = 600
+PIPEWORK_COST = 750  # Min cost is £500
+

 class Costs:
    """
@ -40,8 +109,16 @@ class Costs:
    # We assume a conservative 10% contingency for all works which is a rate defined by SPONs
    CONTINGENCY = 0.1

+    # For flat roof, we assume it's a high risk project as it's very weather dependent and also is heavily
+    # dependent on the quality of the existing roof
+    FLAT_ROOF_CONTINGENCY = 0.15
+
+    # We use a higher contingency rate for internal wall insulation because of the potential for issues with moving
+    # fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
+    IWI_CONTINGENCY = 0.2
+
    # Where there is more uncertainty, a higher contingency rate is used
-    HIGH_RISK_CONTINGENCY = 0.15
+    HIGH_RISK_CONTINGENCY = 0.2
    # When there is less uncertainty, a lower contingency rate is used
    LOW_RISK_CONTINGENCY = 0.05

@ -54,11 +131,21 @@ class Costs:
    # have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate.
    # For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might
    # need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required
-    EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.12
-    EWI_SCAFFOLDING_PRELIMINARIES = 0.15
+    EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.2
+    EWI_SCAFFOLDING_PRELIMINARIES = 0.25

    VAT_RATE = 0.2
-    PROFIT_MARGIN = 0.15
+    PROFIT_MARGIN = 0.2
+
+    # Based on this greenmatch article, on average, a Sash window is around 50% more expensive than a casement window.
+    # Therefore, for a conservative cost estimate, and allowance for a more premium window type, we inflate the material
+    # cost of the windows to allow for a sash window type
+    # https://www.greenmatch.co.uk/windows/double-glazing/cost
+    SASH_WINDOW_INFLATION_FACTOR = 1.5
+
+    # Typically, secondary glazing can be installed for 25% of the cost of double glazed windows - to be conservative,
+    # we scale the cost by half
+    SECONDARY_GLAZING_SCALING_FACTOR = 0.5

    def __init__(self, property_instance):
        """
@ -71,13 +158,16 @@ class Costs:
        self.property = property_instance
        self.regional_labour_variations = regional_labour_variations

-        self.county = county_map.get(self.property.data["county"], None)
-        if self.county is None:
-            raise ValueError("County not found in county map")
+        self.region = county_to_region_map.get(self.property.data["county"], None)
+        if self.region is None:
+            # Try and grab using the local-authority-label
+            self.region = county_to_region_map.get(self.property.data["local-authority-label"], None)
+            if self.region is None:
+                raise ValueError("Region not found in county map")

        self.labour_adjustment_factor = [
            x["Adjustment_Factor"] for x in self.regional_labour_variations if
-            x["Region"] == self.county
+            x["Region"] == self.region
        ][0]

        if not self.labour_adjustment_factor:
@ -115,6 +205,9 @@ class Costs:

        labour_hours = material["labour_hours_per_unit"] * wall_area

+        # Assume a team of 2
+        labour_days = (labour_hours / 8) / 2
+
        return {
            "total": total_cost,
            "subtotal": subtotal_before_vat,
@ -124,7 +217,8 @@ class Costs:
            "material": base_material_cost,
            "profit": profit_cost,
            "labour_hours": labour_hours,
-            "labour_cost": labour_cost
+            "labour_cost": labour_cost,
+            "labour_days": labour_days
        }

    def loft_insulation(self, floor_area, material):
@ -136,12 +230,16 @@ class Costs:
        """
        material_cost_per_m2 = material["material_cost"]

+        # We inflate material costs due to recent price increases
+        material_cost_per_m2 = material_cost_per_m2 * 1.5
+
        base_material_cost = material_cost_per_m2 * floor_area
        labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor

        subtotal_before_profit = base_material_cost + labour_cost

-        contingency_cost = subtotal_before_profit * self.CONTINGENCY
+        # We use high risk contingency because of the possibility of access issues and clearing existing insulation
+        contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
        preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
        profit_cost = subtotal_before_profit * self.PROFIT_MARGIN

@ -153,6 +251,9 @@ class Costs:

        labour_hours = material["labour_hours_per_unit"] * floor_area

+        # Assume a team of 1 person
+        labour_days = labour_hours / 8
+
        return {
            "total": total_cost,
            "subtotal": subtotal_before_vat,
@ -162,7 +263,8 @@ class Costs:
            "material": base_material_cost,
            "profit": profit_cost,
            "labour_hours": labour_hours,
-            "labour_cost": labour_cost
+            "labour_cost": labour_cost,
+            "labour_days": labour_days
        }

    def internal_wall_insulation(self, wall_area, material, non_insulation_materials):
@ -224,8 +326,7 @@ class Costs:

        subtotal_before_profit = labour_costs + materials_costs + demolition_plant_costs

-        # We use high risk contingency for iwi
-        contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
+        contingency_cost = subtotal_before_profit * self.IWI_CONTINGENCY
        preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
        profit_cost = subtotal_before_profit * self.PROFIT_MARGIN

@ -301,7 +402,9 @@ class Costs:

        subtotal_before_profit = labour_costs + materials_costs

-        contingency_cost = subtotal_before_profit * self.CONTINGENCY
+        # Because of the possiblity of damage to the existing floor, or difficulties associated to moving fittings,
+        # we use a higher contingency rate
+        contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
        preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
        profit_cost = subtotal_before_profit * self.PROFIT_MARGIN

@ -569,3 +672,566 @@ class Costs:
            "labour_days": labour_days,
            "labour_cost": labour_costs
        }
+
+    def low_energy_lighting(self, number_of_lights, number_current_lel_lights, material):
+
+        """
+        Calculates the total cost for low energy lighting based on material and labor costs,
+        including contingency, preliminaries, profit, and VAT.
+
+        :param number_of_lights: Int, number of light
+        :param number_current_lel_lights:  Int, number of low energy lights currently installed in the home
+        :material: Dict, material data containing costs of fittings
+        """
+
+        # If there are no lights fitted in the property, we increase the contingency in case there are potential wiring
+        # blockers
+        if number_current_lel_lights == 0:
+            contingency = self.HIGH_RISK_CONTINGENCY
+        else:
+            contingency = self.CONTINGENCY
+
+        material_cost = material["material_cost"] * number_of_lights
+        labour_cost = material["labour_cost"] * number_of_lights * self.labour_adjustment_factor
+
+        subtotal_before_profit = material_cost + labour_cost
+
+        contingency_cost = subtotal_before_profit * contingency
+        preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
+        profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
+
+        subtotal_before_vat = subtotal_before_profit + contingency_cost + preliminaries_cost + profit_cost
+        vat_cost = subtotal_before_vat * self.VAT_RATE
+        total_cost = subtotal_before_vat + vat_cost
+
+        labour_hours = material["labour_hours_per_unit"] * number_of_lights
+        # Assume a single electrician installing
+        labour_days = (labour_hours / 8)
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat_cost,
+            "contingency": contingency_cost,
+            "preliminaries": preliminaries_cost,
+            "material": material_cost,
+            "profit": profit_cost,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+            "labour_cost": labour_cost
+        }
+
+    def flat_roof_insulation(self, floor_area, material, non_insulation_materials):
+        """
+        A model of a warm, flat roof construction can be seen in this video:
+        https://www.youtube.com/watch?v=WZ6Ng6YI9OA
+        Warm, flat roof insulation will normally be 100-125mm in depth
+
+        We break this measure down into the following jobs to be done
+        1) Preparation of the room. This involves cleaning the existing roof surface, removing any debris and repairing
+        any damage. Additionally, an edge barrier will likely need to be installed, to protect the sides of the
+        roof from water ingress.
+        2) Primer Application. A layer of primer is applied to the clean roof surface to enhance the adhestia of
+        subsequent layers, and seal the existing roof surface.
+        3) Vapour Proof Layer Installation. Lay a vapour control layer to prevent moisture ingress from inside the
+        building, which is essential in warm roof construction.
+        4) Insulation Layer Application. Place and securely fix insulation boards over the roof. These could be rigid
+        boards like PIR (Polyisocyanurate).
+        5) Waterproofing Membrane Installation: Cover the insulation (and timber layer, if used) with a
+        waterproofing membrane, like EPDM, PVC, or bituminous felt.  Carefully seal all joints, edges, and around any
+        roof penetrations to ensure water tightness
+
+        :param floor_area: Area of the flat roof to be insulated, based on the area of the floor
+        :param material: Selected insulation material
+        :param non_insulation_materials: Non-insulation materials required for the job
+        :return:
+        """
+
+        preparation_data_m2 = [
+            x for x in non_insulation_materials if
+            (x["type"] == "flat_roof_preparation") and (x["cost_unit"] == "gbp_per_m2")
+        ]
+        vapour_barrier_data = [x for x in non_insulation_materials if x["type"] == "flat_roof_vapour_barrier"]
+        waterproofing_data = [x for x in non_insulation_materials if x["type"] == "flat_roof_waterproofing"]
+
+        if (len(preparation_data_m2) != 2) or (len(vapour_barrier_data) != 1) or (
+            len(waterproofing_data) != 1):
+            raise ValueError("Incorrect number of data entries for non-insulation materials")
+
+        # Break out the individual material costs
+        preparation_m2_material_costs = sum([x["material_cost"] * floor_area for x in preparation_data_m2])
+        vapour_barrier_material_costs = vapour_barrier_data[0]["material_cost"] * floor_area
+        insulation_material_costs = material["material_cost"] * floor_area
+
+        preparation_m2_labour_costs = sum([x["labour_cost"] * floor_area for x in preparation_data_m2])
+        vapour_barrier_labour_costs = vapour_barrier_data[0]["labour_cost"] * floor_area
+
+        # For waterproofing and upstand, we only have a total cost
+        waterproofing_total_costs = waterproofing_data[0]["total_cost"] * floor_area
+
+        labour_costs = preparation_m2_labour_costs + vapour_barrier_labour_costs
+        labour_costs = labour_costs * self.labour_adjustment_factor
+
+        materials_costs = preparation_m2_material_costs + vapour_barrier_material_costs + insulation_material_costs
+
+        subtotal_before_profit = labour_costs + materials_costs + waterproofing_total_costs
+
+        contingency_cost = subtotal_before_profit * self.FLAT_ROOF_CONTINGENCY
+        preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
+        profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
+
+        subtotal_before_vat = subtotal_before_profit + contingency_cost + preliminaries_cost + profit_cost
+        vat_cost = subtotal_before_vat * self.VAT_RATE
+        total_cost = subtotal_before_vat + vat_cost
+
+        preparation_m2_labour_hours = sum([x["labour_hours_per_unit"] * floor_area for x in preparation_data_m2])
+        vapour_barrier_labour_hours = vapour_barrier_data[0]["labour_hours_per_unit"] * floor_area
+        waterproofing_labour_hours = waterproofing_data[0]["labour_hours_per_unit"] * floor_area
+
+        labour_hours = preparation_m2_labour_hours + vapour_barrier_labour_hours + waterproofing_labour_hours
+
+        # To install flat roof insulation, assume a small/medium project might be conducted by a team of 2-4.
+        # We'll assume a team of 2 since a lot of the roofs will be on the smaller side and will review this later
+        labour_days = (labour_hours / 8) / 2
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat_cost,
+            "contingency": contingency_cost,
+            "preliminaries": preliminaries_cost,
+            "material": materials_costs,
+            "profit": profit_cost,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+            "labour_cost": labour_costs
+        }
+
+    def window_glazing(self, number_of_windows, material, is_secondary_glazing=False):
+        """
+        We characterise the jobs to be done for window glazing as the following:
+        1) Initial Assessment and Measurements: Before removing the existing window, it's essential to assess the
+        condition of the window frame and opening. Precise measurements are taken to ensure the new double glazed
+        windows fit perfectly.
+
+        2) Remove the Existing Window: This involves carefully dismantling and removing the old single glazed window. It
+        requires skill to avoid damaging the surrounding wall and the window frame (if it's to be reused).
+
+        3) Dispose of the Existing Window: The old window, especially if it's a single glazed unit, needs to be
+        disposed of responsibly. Glass and other materials should be recycled where possible.
+
+        4) Surface Preparation: The window opening might need some preparation, especially if there's damage or if
+        adjustments are needed to accommodate the new window. This can include repairing or replacing parts of the
+        window frame, sealing gaps, and ensuring the opening is level and square.
+
+        5) Install the Window Frame (if new frames are used): In many cases, double glazed windows come with their
+        frames. These need to be installed securely into the window opening. This process involves aligning, leveling,
+        and fixing the frame in place.
+
+        6) Install the Window Sill: If a new window sill is required, it is installed at this stage. It needs to be
+        correctly aligned with the frame and securely attached.
+
+        7) Install the Double Glazed Glass Units: The glass units are carefully inserted into the frame. This step
+        requires precision to ensure a snug fit without causing stress on the glass, which could lead to cracking or
+        breaking.
+
+        8) Sealing and Weatherproofing: After the glass units are in place, it's crucial to seal around the frame and
+        between the glass and frame to ensure there are no drafts and that the installation is weather-tight. This
+        typically involves applying silicone sealant or other appropriate sealing materials.
+
+        9) Finishing Touches: This includes any cosmetic work, such as trimming, painting, or staining the frame and
+        sill to match the rest of the property. It might also involve cleaning up any mess created during the
+        installation.
+
+        10) Inspection and Testing: Finally, the new windows should be inspected to ensure they open, close, and lock
+        correctly. This is also a good time to check for any gaps or issues with the sealing.
+
+        For this cost estimation process, we factor in initial assement into the preliminaries
+
+        """
+
+        material_cost = material["material_cost"] * number_of_windows
+
+        labour_cost = (
+            material["labour_cost"] * number_of_windows * self.labour_adjustment_factor
+        )
+        multiplier = self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else (
+            self.SASH_WINDOW_INFLATION_FACTOR)
+
+        subtotal = (material_cost + labour_cost) * multiplier
+
+        contingency_cost = subtotal * self.CONTINGENCY
+        preliminaries_cost = subtotal * self.PRELIMINARIES
+        profit_cost = subtotal * self.PROFIT_MARGIN
+
+        subtotal_before_vat = subtotal + contingency_cost + preliminaries_cost + profit_cost
+
+        vat_cost = subtotal_before_vat * self.VAT_RATE
+
+        total_cost = subtotal_before_vat + vat_cost
+
+        labour_hours = material["labour_hours_per_unit"] * number_of_windows
+        labour_hours = labour_hours * self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else labour_hours
+
+        # Assume a team of 2
+        labour_days = (labour_hours / 8) / 2
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat_cost,
+            "contingency": contingency_cost,
+            "preliminaries": preliminaries_cost,
+            "material": material_cost,
+            "profit": profit_cost,
+            "labour_hours": labour_hours,
+            "labour_cost": labour_cost,
+            "labour_days": labour_days
+        }
+
+    def solar_pv(self, wattage: float, has_battery: bool = False):
+
+        """
+        Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
+        costing data for installations of renewable and clean energy measures.
+
+        The data in the dashboard is filtered on domestic building installations and then the data across the
+        various regions is manually collected. There is currently no automated way to get the data from the MCS
+        dashboard
+
+        Price can also be benchmarked against this checkatrade article:
+        https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
+        :param wattage: Peak wattage of the solar PV system]
+        :param has_battery: Bool, whether the system includes a battery
+        """
+
+        # Get the cost data relevant to the region
+        regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
+
+        kw = wattage / 1000
+        total_cost = kw * regional_cost
+
+        if has_battery:
+            # The battery cost is based on the £3500 quote, recieved from installers
+            total_cost += BATTERY_COST
+
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+
+        vat = total_cost - subtotal_before_vat
+
+        # Labour hours are based on estimates from online research but an average team seems to consist of 3 people
+        # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of
+        # labour
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": 72,
+            "labour_days": 2,
+        }
+
+    def programmer_and_appliance_thermostat(self, has_programmer):
+        """
+        Calculate the total cost of installing a programmer and appliance thermostat
+        If the property already has a programmer, then the only thing we need to calculate the cost for is the
+        appliance thermostat
+        """
+
+        if has_programmer:
+            labour_hours = 2
+            total_cost = SMART_APPLIANCE_THERMOSTAT_COST
+        else:
+            labour_hours = 4
+            total_cost = SMART_APPLIANCE_THERMOSTAT_COST + PROGRAMMER_COST
+
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        # We estimate the cost of an appliance thermostat at £400, which is the upper end of the range
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": 1,
+        }
+
+    def electric_room_heaters(self, number_heated_rooms):
+        """
+        We base the estimates for the cost of electric room heaters on the cost per room as estimated by the
+        following article:
+        https://www.bestelectricradiators.co.uk/blog/cost-to-install-a-new-heating-system-uk/
+        
+        :param number_heated_rooms: int, number of rooms to be heated
+        :return: 
+        """
+
+        total_cost = 500 * number_heated_rooms
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        # TODO: Rough estimate to be reviewed
+        labour_hours = 1 * number_heated_rooms
+        labour_days = np.ceil(labour_hours / 8)
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+        }
+
+    def high_heat_electric_storage_heaters(self, number_heated_rooms):
+
+        """
+        We base the estimates for the cost of electric storage heaters on the cost per room as estimated by the
+        energy saving trust
+        https://energysavingtrust.org.uk/advice/electric-heating/
+
+        The cost is based on the number of heated rooms
+        :param number_heated_rooms: int, number of rooms to be heated
+        """
+
+        total_cost = 1500 * number_heated_rooms
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        # TODO: Rough estimate to be reviewed
+        labour_hours = 3 * number_heated_rooms
+        labour_days = np.ceil(labour_hours / 8)
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+        }
+
+    def celect_type_controls(self):
+        """
+        Calculate the cost of installing Celect type controls
+        """
+
+        # The £50 cost is a rough estimate based on internet research
+        total_cost = 50
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        # We estimate the labour hours to be 4
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": 4,
+            "labour_days": 1,
+        }
+
+    def hot_water_tank_insulation(self):
+        """
+        Calculate the cost of installing hot water tank insulation
+        """
+
+        # The £50 cost is a rough estimate based on internet research
+        total_cost = 50
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": 0,
+            "labour_days": 0,
+        }
+
+    def roomstat_programmer_trvs(
+        self, number_heated_rooms, has_programmer, has_trvs, has_room_thermostat
+    ):
+        """
+
+        :return:
+        """
+
+        total_cost = 0
+        labour_hours = 0
+
+        if not has_programmer:
+            total_cost += PROGRAMMER_COST
+            labour_hours += 1
+
+        if not has_trvs:
+            total_cost += TRVS_COST * number_heated_rooms
+            labour_hours += 0.25 * number_heated_rooms
+
+        if not has_room_thermostat:
+            total_cost += ROOM_THERMOSTAT_COST
+            labour_hours += 0.5
+
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": 1,
+        }
+
+    def time_and_temperature_zone_control(self, number_heated_rooms):
+
+        # The product costs are inclusive of VAT
+        product_costs = (
+            TTZC_SMART_THERMOSTAT_COST +
+            TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms +
+            TTZC_SMART_RADIATOR_VALUES * number_heated_rooms
+        )
+        labour_hours = (
+            TTZC_SMART_THERMOSTAT_LABOUR_HOURS +
+            TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms +
+            TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms
+        )
+        labour_costs = TTZC_ELECTRICIAN_HOURLY_RATE * labour_hours
+        # Add continency and preliminaries to the labour to account for the complexity of the job
+        labour_costs = labour_costs * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+
+        vat = labour_costs * self.VAT_RATE
+
+        subtotal_before_vat = product_costs + labour_costs
+        total_cost = subtotal_before_vat + vat
+
+        labour_days = np.ceil(labour_hours / 8)
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+        }
+
+    def heater_removal(self, n_rooms):
+        """
+        Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater
+        :return:
+        """
+
+        removal_cost = ROOM_HEATER_REMOVAL_COST * n_rooms
+        removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_rooms
+
+        vat = removal_cost * self.VAT_RATE
+
+        subtotal_before_vat = removal_cost
+        total_cost = subtotal_before_vat + vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": removal_labour_hours,
+            "labour_days": np.ceil(removal_labour_hours / 8),
+        }
+
+    @staticmethod
+    def _estimate_n_radiators(number_habitable_rooms, total_floor_area, property_type, built_form):
+        # Base number of radiators: one per habitable room
+        base_radiators = number_habitable_rooms
+
+        # Additional radiators for non-habitable essential areas (e.g., kitchens, hallways)
+        additional_radiators = 3  # Initial assumption
+
+        # Adjust additional radiators based on property type
+        if property_type == 'Flat':
+            additional_radiators -= 1  # Flats may need fewer radiators due to less exposure
+        elif property_type in ['House', 'Bungalow', 'Maisonette']:
+            # Multiple floors in Maisonette may require additional heating points
+            additional_radiators += 2  # Houses and bungalows might need more due to greater exposure
+        else:
+            raise Exception("Invalid property type")
+
+        # Adjust total radiator needs based on built form
+        form_factor = {
+            'Mid-Terrace': 0.95,
+            'Semi-Detached': 1.05,
+            'Detached': 1.25,
+            'End-Terrace': 1.05
+        }
+
+        # Calculate total heating power needed and number of radiators based on standard output
+        total_heating_power_required = total_floor_area * 80  # Watts per square meter
+        radiator_output = 1000  # Average wattage per radiator
+        total_radiators_based_on_power = (total_heating_power_required / radiator_output) * form_factor[built_form]
+
+        # Final estimation taking the higher of calculated needs or base room count
+        estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
+        return round(estimated_radiators)
+
+    def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
+        """
+        Based on a basic estimate of median value £2600 to install a low carbon combi boiler
+        First time central heating vosts can als be found here:
+        https://www.checkatrade.com/blog/cost-guides/central-heating-installation-cost/
+        :return:
+        """
+
+        unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size]
+        # The unit cost is the cost without VAT
+        # We now need to estimate the cost of the works
+        labour_days = 2
+        labour_hours = labour_days * 8
+        labour_rate = 300
+
+        # Average cost of installation is 1 (maybe 2days) at £300 per day
+        # https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/
+        # To be pessimistic, assume 2 days work
+        labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
+        # Add contingency and preliminaries
+        labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+
+        # labour_days = labour_days + (removal_labour_hours / 8)
+
+        vat = labour_cost * self.VAT_RATE
+
+        subtotal_before_vat = unit_cost + labour_cost
+        total_cost = subtotal_before_vat + vat
+
+        # if there are existing room heaters, we need to add the cost of removing them
+        if exising_room_heaters:
+            removal_costing = self.heater_removal(n_rooms=n_heated_rooms)
+            # Add the totals to the existing totals
+            total_cost += removal_costing["total"]
+            subtotal_before_vat += removal_costing["subtotal"]
+            labour_hours += removal_costing["labour_hours"]
+            labour_days += removal_costing["labour_days"]
+            vat += removal_costing["vat"]
+
+        if system_change:
+            # We need the cost of radiators
+            n_radiators = self._estimate_n_radiators(
+                number_habitable_rooms=n_rooms,
+                total_floor_area=self.property.floor_area,
+                property_type=self.property.data["property-type"],
+                built_form=self.property.data["built-form"]
+            )
+
+            additionals_labour_cost = labour_rate * self.labour_adjustment_factor
+            radiator_cost = DOUBLE_RADIATOR_COST * n_radiators
+            system_change_cost = radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost
+            system_change_cost_before_vat = system_change_cost / (1 + self.VAT_RATE)
+            system_change_vat = system_change_cost - system_change_cost_before_vat
+            # We add an extra labour day for the system change
+            labour_days += 1
+            labour_hours += 8
+            total_cost += system_change_cost
+            subtotal_before_vat += system_change_cost_before_vat
+            vat += system_change_vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+        }
--- a/recommendations/FireplaceRecommendations.py
+++ b/recommendations/FireplaceRecommendations.py
@ -20,7 +20,7 @@ class FireplaceRecommendations(Definitions):
        self.has_ventilaion = None
        self.recommendation = None

-    def recommend(self):
+    def recommend(self, phase=0):
        """
        Based on the number of open fireplcaes found, we recommend sealing each one at a cost of
        around £500
@ -32,19 +32,23 @@ class FireplaceRecommendations(Definitions):
        if number_open_fireplaces == 0:
            return

-        estimated_cost = number_open_fireplaces * self.COST_OF_WORK
+        already_installed = "sealing_open_fireplace" in self.property.already_installed
+        estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not already_installed else 0

        # We recommend installing two mechanical ventilation systems
        self.recommendation = [
            {
+                "phase": phase,
                "parts": [],
                "type": "sealing_open_fireplace",
                "description": "Seal %s open fireplaces" % str(number_open_fireplaces),
                "starting_u_value": None,
                "new_u_value": None,
                "sap_points": None,
+                "already_installed": already_installed,
                "total": estimated_cost,
                # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
-                "labour_hours": 6 * number_open_fireplaces
+                "labour_hours": 6 * number_open_fireplaces,
+                "labour_days": 6 * number_open_fireplaces / 8,  # Assume 8 hour day
            }
        ]
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@ -8,9 +8,8 @@ from datatypes.enums import QuantityUnits
 from backend.Property import Property
 from recommendations.recommendation_utils import (
    r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
-    get_recommended_part, get_floor_u_value
+    get_recommended_part, get_floor_u_value, override_costs
 )
-from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
 from recommendations.Costs import Costs


@ -51,8 +50,9 @@ class FloorRecommendations(Definitions):
            ]
        ]

+        # For solid floor, we don't use materials that are too thick
        self.solid_floor_insulation_materials = [
-            part for part in materials if part["type"] == "solid_floor_insulation"
+            part for part in materials if part["type"] == "solid_floor_insulation" if float(part["depth"]) <= 75
        ]

        self.solid_floor_non_insulation_materials = [
@ -69,15 +69,9 @@ class FloorRecommendations(Definitions):
        # TODO: To be completed
        self.exposed_floor_non_insulation_materials = []

-    def recommend(self):
+    def recommend(self, phase=0):
        u_value = self.property.floor["thermal_transmittance"]
-
-        floor_level = (
-            FLOOR_LEVEL_MAP[self.property.data["floor-level"]] if
-            self.property.data["floor-level"] not in self.DATA_ANOMALY_MATCHES else None
-        )
        property_type = self.property.data["property-type"]
-
        floor_area = self.property.insulation_floor_area
        year_built = self.property.year_built

@ -89,7 +83,13 @@ class FloorRecommendations(Definitions):
            return

        # If the property is a flat that isn't at ground level, it's likely impractical to recommend a floor upgrade
-        if (floor_level != 0) and (property_type == "Flat"):
+        if (self.property.floor_level != 0) and (property_type == "Flat") and (
+            self.property.floor["another_property_below"]
+        ):
+            return
+
+        # If the property is a new build flat, we won't recommend floor upgrades
+        if len(self.property.full_sap_epc) and (property_type == "Flat"):
            return

        if u_value:
@ -103,15 +103,17 @@ class FloorRecommendations(Definitions):
                # The floor is already compliant
                return

-        u_value = get_floor_u_value(
-            floor_type=self.property.floor_type,
-            area=floor_area,
-            perimeter=self.property.perimeter,
-            age_band=self.property.age_band,
-            insulation_thickness=self.property.floor["insulation_thickness"],
-            wall_type=self.property.wall_type
-        )
-        self.estimated_u_value = u_value
+        if u_value is None:
+            u_value = get_floor_u_value(
+                floor_type=self.property.floor_type,
+                area=floor_area,
+                perimeter=self.property.perimeter,
+                age_band=self.property.age_band,
+                insulation_thickness=self.property.floor["insulation_thickness"],
+                wall_type=self.property.wall_type
+            )
+
+            self.estimated_u_value = u_value

        if u_value < self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
            return
@ -119,6 +121,7 @@ class FloorRecommendations(Definitions):
        if self.property.floor["is_suspended"]:
            # Given the U-value, we recommend underfloor insulation
            self.recommend_floor_insulation(
+                phase=phase,
                u_value=u_value,
                insulation_materials=self.suspended_floor_insulation_materials,
                non_insulation_materials=self.suspended_floor_non_insulation_materials
@ -130,7 +133,8 @@ class FloorRecommendations(Definitions):
            self.recommend_floor_insulation(
                u_value=u_value,
                insulation_materials=self.solid_floor_insulation_materials,
-                non_insulation_materials=self.solid_floor_non_insulation_materials
+                non_insulation_materials=self.solid_floor_non_insulation_materials,
+                phase=phase
            )
            return

@ -142,9 +146,22 @@ class FloorRecommendations(Definitions):

    @staticmethod
    def _make_floor_description(material):
-        return f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation"

-    def recommend_floor_insulation(self, u_value, insulation_materials, non_insulation_materials):
+        if material["type"] == "suspended_floor_insulation":
+            return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation in "
+                    f"suspended floor")
+
+        if material["type"] == "solid_floor_insulation":
+            return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation on "
+                    f"solid floor")
+
+        if material["type"] == "exposed_floor_insulation":
+            return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation in "
+                    f"exposed floor")
+
+        raise ValueError("Invalid material type - implement me!")
+
+    def recommend_floor_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
        """
        This method is tasked with estimating the impact of performing suspended floor insulation
        :return:
@ -175,17 +192,27 @@ class FloorRecommendations(Definitions):
                            material=material.to_dict(),
                            non_insulation_materials=non_insulation_materials
                        )
+
+                        already_installed = "suspended_floor_insulation" in self.property.already_installed
+                        if already_installed:
+                            cost_result = override_costs(cost_result)
+
                    elif material["type"] == "solid_floor_insulation":
                        cost_result = self.costs.solid_floor_insulation(
                            insulation_floor_area=self.property.insulation_floor_area,
                            material=material.to_dict(),
                            non_insulation_materials=non_insulation_materials
                        )
+
+                        already_installed = "solid_floor_insulation" in self.property.already_installed
+                        if already_installed:
+                            cost_result = override_costs(cost_result)
                    else:
                        raise NotImplementedError("Implement me!")

                    self.recommendations.append(
                        {
+                            "phase": phase,
                            "parts": [
                                get_recommended_part(
                                    part=material.to_dict(),
@ -194,11 +221,12 @@ class FloorRecommendations(Definitions):
                                    cost_result=cost_result
                                ),
                            ],
-                            "type": "floor_insulation",
+                            "type": material["type"],
                            "description": self._make_floor_description(material),
                            "starting_u_value": u_value,
                            "new_u_value": new_u_value,
                            "sap_points": None,
+                            "already_installed": already_installed,
                            **cost_result
                        }
                    )
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@ -0,0 +1,248 @@
+from recommendations.Costs import Costs
+from recommendations.recommendation_utils import check_simulation_difference, override_costs
+from backend.Property import Property
+from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
+
+
+class HeatingControlRecommender:
+
+    def __init__(self, property_instance: Property):
+        self.property = property_instance
+        self.costs = Costs(self.property)
+
+        self.recommendation = []
+
+    def recommend(self, heating_description):
+
+        # Reset the recommendations
+        self.recommendation = []
+
+        # This first iteration of the recommender will provide very basic recommendation
+        # We recommend heating controls based on the main heating system
+        if heating_description in ["Room heaters, electric"]:
+            self.recommend_room_heaters_electric_controls()
+            return
+
+        if heating_description in ["Electric storage heaters", "Electric storage heaters, radiators"]:
+            self.recommend_high_heat_retention_controls()
+            return
+
+        if heating_description in ["Boiler and radiators, mains gas"]:
+            # We can recommend roomstat programmer trvs
+            self.recommend_roomstat_programmer_trvs()
+            # We can also recommend time and temperature zone controls
+            self.recommend_time_temperature_zone_controls()
+
+            return
+
+    def recommend_room_heaters_electric_controls(self):
+        """
+        If the home has Room heaters, electric, we start by identifying potential heating controls that could
+        be upgraded, that would provide a practical impact. This will be the least invasive improvement.
+
+        We can then consider the heating system itself
+        :return:
+        """
+        if (self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]) or (
+            self.property.main_heating_controls["clean_description"] in ["Programmer and room thermostat"]
+        ):
+            # We recommend Programmer and appliance thermostats as the heating control. This has an average energy
+            # efficiency rating, and is likely to be more efficient than the current heating controls. if the
+            # rating is poor or very poor, the home may have a Programmer and room thermostat, which is less efficient
+            # than a Programmer and appliance thermostats, because it allows for much more granular control at not
+            # just a room level but individual heater/appliance level
+
+            # Note: A room thermostat is commonly placed in a hallway, and it measures the temperature of the air
+            # surrounding it. It then sends a signal to the heating system to turn on or off, depending on the
+            # temperature. An appliance thermostat, on the other hand, is placed on the heater/appliance itself, and
+            # measures the temperature of the heater/appliance. This allows for much more granular control, and
+            # prevents overheating.
+
+            # In order to cost, we check if the property already has a programmer, and therefor we will just need to
+            # add the cost of the appliance thermostats
+
+            has_programmer = self.property.main_heating_controls["switch_system"] == "programmer"
+
+            ending_config = MainheatControlAttributes("Programmer and appliance thermostats").process()
+            # We look at what has changed in the ending config, and compare it to the current config
+
+            # We use this to determine how we should be updating the config
+            simulation_config = check_simulation_difference(
+                new_config=ending_config, old_config=self.property.main_heating_controls
+            )
+            # This upgrade will only take the heating system to average energy efficiency
+            simulation_config["mainheatc_energy_eff_ending"] = "Good"
+
+            self.recommendation.append(
+                {
+                    "description": "upgrade heating controls to Programmer and Appliance or Smart Thermostats",
+                    **self.costs.programmer_and_appliance_thermostat(has_programmer=has_programmer),
+                    "simulation_config": simulation_config
+                }
+            )
+
+        # We don't implement any other recommendations right now
+        return
+
+    def recommend_high_heat_retention_controls(self):
+        """
+        When applicable, we recommend upgrading the heating controls to high heat retention controls. This is a
+        specific type of control system that is designed to work with electric storage heaters. It is a more
+        efficient control system than the standard controls that come with electric storage heaters.
+
+        We can then consider the heating system itself
+        :return:
+        """
+
+        # We recommend upgrading to Celect type controls
+        ending_config = MainheatControlAttributes("Controls for high heat retention storage heaters").process()
+        # We look at what has changed in the ending config, and compare it to the current config
+        simulation_config = check_simulation_difference(
+            new_config=ending_config, old_config=self.property.main_heating_controls
+        )
+        # This upgrade will only take the heating system to average energy efficiency
+        simulation_config["mainheatc_energy_eff_ending"] = "Good"
+
+        self.recommendation.append(
+            {
+                "description": "upgrade heating controls to High Heat Retention Storage Heater Controls",
+                **self.costs.celect_type_controls(),
+                "simulation_config": simulation_config
+            }
+        )
+
+        # We don't implement any other recommendations right now
+        return
+
+    def recommend_roomstat_programmer_trvs(self):
+        """
+        If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could
+        be upgraded, that would provide a practical impact.
+
+        The criteria for recommending an upgrade to heating controls are (one of these must be true)
+        1) There are no controls
+        2) No programmer
+        3) No room thermostat
+        4) No TRVs
+
+
+        :return:
+        """
+
+        # We check if we have the conditions to recommend this upgrade
+
+        needs_programmer = self.property.main_heating_controls["switch_system"] is None
+        needs_room_thermostat = self.property.main_heating_controls["thermostatic_control"] is None
+        needs_trvs = self.property.main_heating_controls["trvs"] is None
+
+        can_recommend = (
+            (self.property.main_heating_controls["no_control"] is not None) or
+            needs_programmer or
+            needs_room_thermostat or
+            needs_trvs
+        )
+
+        if not can_recommend:
+            return
+
+        ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process()
+        # We use this to determine how we should be updating the config
+        simulation_config = check_simulation_difference(
+            new_config=ending_config, old_config=self.property.main_heating_controls
+        )
+        # This upgrade will only take the heating system to average energy efficiency
+        # If the current system is below good, we make it good
+        if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]:
+            simulation_config["mainheatc_energy_eff_ending"] = "Good"
+
+        has_programmer = not needs_programmer
+        has_room_thermostat = not needs_room_thermostat
+        has_trvs = not needs_trvs
+
+        cost_result = self.costs.roomstat_programmer_trvs(
+            number_heated_rooms=int(self.property.data["number-heated-rooms"]),
+            has_programmer=has_programmer,
+            has_room_thermostat=has_room_thermostat,
+            has_trvs=has_trvs
+        )
+
+        description = "upgrade heating controls to Room thermostat, programmer and TRVs"
+
+        already_installed = "heating_control" in self.property.already_installed
+        if already_installed:
+            cost_result = override_costs(cost_result)
+            description = "Heating controls have already been upgraded, no further action needed."
+
+        self.recommendation.append(
+            {
+                "type": "heating_control",
+                "parts": [],
+                "description": description,
+                **cost_result,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                "simulation_config": simulation_config
+            }
+        )
+
+        return
+
+    def recommend_time_temperature_zone_controls(self):
+        """
+        If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced
+        and more efficient control system than the standard controls that come with a boiler. However, it may come
+        with a higher cost and more involved usage
+        :return:
+        """
+
+        # We check if the efficiency of the current heating controls is good or below, and
+
+        # Conditions for installation are as follows:
+        # 1) The current heating controls are not time and temperature zone controls
+        # 2) The current heating controls are not already at 'Very Good' or above
+
+        if (
+            (self.property.main_heating_controls["thermostatic_control"] == "time and temperature zone control") or
+            (self.property.data["mainheatc-energy-eff"] in ["Very Good"])
+        ):
+            # No recommendation needed
+            return
+
+        ending_config = MainheatControlAttributes("Time and temperature zone control").process()
+
+        # We use this to determine how we should be updating the config
+        simulation_config = check_simulation_difference(
+            new_config=ending_config, old_config=self.property.main_heating_controls
+        )
+
+        # If the current system is below very good, we make it very good
+        if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
+            simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
+
+        cost_result = self.costs.time_and_temperature_zone_control(
+            number_heated_rooms=int(self.property.data["number-heated-rooms"])
+        )
+
+        description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
+                       "temperature zone control)")
+
+        already_installed = "heating_control" in self.property.already_installed
+        if already_installed:
+            cost_result = override_costs(cost_result)
+            description = "Heating controls have already been upgraded, no further action needed."
+
+        self.recommendation.append(
+            {
+                "type": "heating_control",
+                "parts": [],
+                "description": description,
+                **cost_result,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                "simulation_config": simulation_config
+            }
+        )
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@ -0,0 +1,435 @@
+import pandas as pd
+
+from recommendations.Costs import Costs
+from recommendations.recommendation_utils import check_simulation_difference, override_costs
+from backend.Property import Property
+from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
+from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
+from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
+from recommendations.HeatingControlRecommender import HeatingControlRecommender
+
+
+class HeatingRecommender:
+
+    def __init__(self, property_instance: Property):
+        self.property = property_instance
+        self.costs = Costs(self.property)
+
+        self.recommendations = []
+
+    def recommend(self, phase=0):
+
+        # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
+        #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
+        #       in the Costs class, stored as SYSTEM_FLUSH_COST
+
+        self.recommendations = []
+        # This first iteration of the recommender will provide very basic recommendation
+        # We recommend heating controls based on the main heating system
+
+        has_electric_heating_description = self.property.main_heating["clean_description"] in [
+            "Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators"
+        ]
+
+        no_heating_no_mains = (
+            self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
+            not self.property.data["mains-gas-flag"]
+        )
+
+        if has_electric_heating_description or no_heating_no_mains:
+            # Recommend high heat retention storage heaters
+            self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
+
+        # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
+        has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
+
+        # We also check that the property doesn't have a heating system, but it has access to the mains gas
+        no_heating_has_mains = self.property.main_heating["clean_description"] in [
+            'No system present, electric heaters assumed'
+        ] and self.property.data["mains-gas-flag"]
+
+        has_gas_heaters = (
+            self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
+            self.property.data["mains-gas-flag"]
+        )
+
+        # We also check if the property has electric heating, but it has access to the mains gas
+        electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
+
+        portable_heaters_has_mains = (
+            self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
+            self.property.data["mains-gas-flag"]
+        )
+
+        if (
+            has_boiler or
+            no_heating_has_mains or
+            electic_heating_has_mains or
+            has_gas_heaters or
+            portable_heaters_has_mains
+        ):
+            # This indicates that the home previously did not have a boiler in place and so would require
+            # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
+            system_change = not has_boiler
+            exising_room_heaters = self.property.main_heating["clean_description"] in [
+                "Room heaters, electric", "Room heaters, mains gas"
+            ]
+
+            self.recommend_boiler_upgrades(
+                phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
+            )
+
+        return
+
+    @staticmethod
+    def check_simulation_difference(old_config, new_config):
+        """
+        Given two dictionaries, that describe the heating control configurations, this method will compare the two
+        and pick out the differences. These differences will be things that have been added and things that have been
+        removed. This will be used to determine how we should be updating the configuration in the simulation
+        :return:
+        """
+
+        differences = {key + "_ending": new_config[key] for key in new_config if old_config[key] != new_config[key]}
+
+        return differences
+
+    def combine_heating_and_controls(
+        self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
+        system_change
+    ):
+        """
+        Given a recommendation for heating controls, and a recommendation for the heating system, we combine the two
+        into a single recommendation
+        :param controls_recommendations: The heating controls recommendations
+        :param heating_simulation_config: The simulation configuration for the heating system
+        :param costs: The costs of the heating system
+        :param description: The description of the recommendation
+        :param phase: The phase of the recommendation
+        :param heating_controls_only: If True, we will also add a recommendation for heating controls only
+        :param system_change: Indicates if we are recommending a different type of heating system, compared to the
+        current system. If we have a system change and we have a heat control recommendation, we only recommend
+        both heating and controls together
+        :return:
+        """
+
+        # We produce recommendations with & without heating controls
+        # We will also produce a recommendation for heating controls only
+        heating_controls_switch = [True, False] if controls_recommendations else [False]
+        if not heating_simulation_config:
+            heating_controls_switch = []
+
+        if system_change and len(controls_recommendations):
+            heating_controls_switch = [True]
+
+        output = []
+        for controls_switch in heating_controls_switch:
+            total_costs = costs.copy()
+            recommendation_simulation_config = heating_simulation_config.copy()
+            recommendation_description = description
+            if controls_switch:
+                # We add the costs of the heating controls, onto each key in the costs dictionary
+                for key in total_costs:
+                    total_costs[key] += controls_recommendations[0][key]
+
+                recommendation_simulation_config = {
+                    **recommendation_simulation_config,
+                    **controls_recommendations[0]["simulation_config"]
+                }
+                controls_description = controls_recommendations[0]['description']
+                # Make the first letter of the description lowercase
+                controls_description = (
+                    controls_description[0].lower() + controls_description[1:]
+                )
+
+                recommendation_description = f"{description} and {controls_description}"
+
+            already_installed = "cavity_wall_insulation" in self.property.already_installed
+            if already_installed:
+                total_costs = override_costs(total_costs)
+                recommendation_description = "Heating system has already been upgraded, no further action needed."
+
+            recommendation = {
+                "phase": phase,
+                "parts": [
+                    # TODO
+                ],
+                "type": "heating",
+                "description": recommendation_description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                **total_costs,
+                "simulation_config": recommendation_simulation_config
+            }
+
+            output.append(recommendation)
+
+        if heating_controls_only and len(controls_recommendations):
+            # Also add on a recommendation for heating controls only
+            heating_control_recommendation = controls_recommendations[0].copy()
+            # Capitalize the first letter of the description
+            heating_control_recommendation["description"] = (
+                heating_control_recommendation["description"][0].upper() +
+                heating_control_recommendation["description"][1:]
+            )
+
+            output.append(
+                {
+                    "phase": phase,
+                    "parts": [
+                        # TODO
+                    ],
+                    "type": "heating",
+                    "starting_u_value": None,
+                    "new_u_value": None,
+                    "sap_points": None,
+                    **heating_control_recommendation
+                }
+            )
+
+        return output
+
+    def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only):
+        """
+        We will recommend upgrading to a high heat retention storage system, if the current system is not already
+        high heat retention storage
+
+        :param phase: The phase of the recommendation
+        :param system_change: Indicates if we are recommending a different type of heating system, compared to the
+        current system
+        :param heating_controls_only: Indicates if we should include a recommendation for just heating controls
+        :return:
+        """
+
+        controls_recommender = HeatingControlRecommender(self.property)
+        # The heating controls we're recommending for are based on the recommended heating system
+        high_heat_retention_contols_desc = "Controls for high heat retention storage heaters"
+        # We only recommend Celect-type controls if the current heating system is not Celect-type controls
+        if self.property.main_heating_controls["clean_description"] != high_heat_retention_contols_desc:
+            controls_recommender.recommend(heating_description="Electric storage heaters, radiators")
+
+        # Conditions for not needing this recommendation
+        already_installed_hh_retention = (
+            "Electric storage heaters" in self.property.main_heating["clean_description"] and
+            self.property.main_heating_controls["clean_description"].lower() == high_heat_retention_contols_desc.lower()
+        )
+
+        # Conditions for not recommending electric storage heaters
+        if already_installed_hh_retention:
+            # No recommendation needed
+            return
+
+        # Set up artefacts, suitable for the simulation and regardless of controls
+        heating_ending_config = MainHeatAttributes("Electric storage heaters, radiators").process()
+        heating_simulation_config = check_simulation_difference(
+            new_config=heating_ending_config, old_config=self.property.main_heating
+        )
+        # This upgrade will only take the heating system to average energy efficiency
+        heating_simulation_config["mainheat_energy_eff_ending"] = "Average"
+
+        # If the property is off-gas and has no heating system in place, the number of heated rooms will actually
+        # be 0, so we use the number of rooms as the figure
+        number_heated_rooms = (
+            self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0
+            else (
+                self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else
+                self.property.number_of_rooms
+            )
+        )
+        # Upgrade to electric storage heaters
+        costs = self.costs.high_heat_electric_storage_heaters(
+            number_heated_rooms=number_heated_rooms
+        )
+        description = "Install high heat retention electric storage heaters"
+
+        recommendations = self.combine_heating_and_controls(
+            controls_recommendations=controls_recommender.recommendation,
+            heating_simulation_config=heating_simulation_config,
+            costs=costs,
+            description=description,
+            phase=phase,
+            heating_controls_only=heating_controls_only,
+            system_change=system_change
+        )
+
+        self.recommendations.extend(recommendations)
+
+    @staticmethod
+    def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
+        # Step 1: Base size estimation based on property type (as a starting point)
+        base_size = {
+            'Flat': 25,
+            'House': 30,
+            'Maisonette': 28,
+            'Bungalow': 27
+        }
+
+        # Step 2: Calculate the volume of the property
+        volume = floor_area * floor_height
+
+        # Step 3: Adjust base size for built form (to account for heat retention)
+        form_adjustment = {
+            'Mid-Terrace': 0,
+            'End-Terrace': 2,
+            'Semi-Detached': 4,
+            'Detached': 6
+        }
+
+        # Step 4: Further adjust for the total volume and number of heated rooms
+        volume_adjustment = (volume / 100)  # Simplified adjustment factor for volume
+        rooms_adjustment = (num_heated_rooms - 5) * 0.5  # Assuming base case of 5 rooms
+
+        # Calculate the estimated boiler size
+        estimated_size = base_size[property_type] + form_adjustment[built_form] + volume_adjustment + rooms_adjustment
+
+        # Step 5: Align with available boiler sizes and ensure it does not exceed 35kW, as it's rare to need more
+        available_sizes = [30, 35, 40, 45, 50]
+        estimated_size = min(max(estimated_size, 30), 40)  # Ensure within 30kW to 35kW range
+
+        # Find the closest available size (in this case, either rounding up or down to align with 30 or 35)
+        closest_size = min(available_sizes, key=lambda x: abs(x - estimated_size))
+
+        return closest_size
+
+    def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters):
+        """
+        This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
+        is generally more expensive
+        :param phase:
+        :param system_change: Indicates if the property would be undergoing a heating system change. This could be true
+                              if the home didn't have a heating system in place, or if the home had electric heating
+                              previously
+        :param exising_room_heaters: Indicates if the property had room heaters previously - if so, a boiler
+                                     recommendation will need to be accompanied by removal of the room heaters
+        :return:
+        """
+
+        recommendation_phase = phase
+
+        # We now recommend boiler upgrades, if applicable
+        simulation_config = {}
+        boiler_costs = {}
+        boiler_recommendation = {}
+        if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
+            boiler_size = self.estimate_boiler_size(
+                property_type=self.property.data["property-type"],
+                built_form=self.property.data["built-form"],
+                floor_area=self.property.floor_area,
+                floor_height=self.property.floor_height,
+                num_heated_rooms=self.property.data["number-heated-rooms"],
+            )
+
+            # We recommend a combi boiler under the following conditions
+            # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
+            #    heated if there is no existing heating system).
+            # 2) There 1 or fewer bathrooms
+            # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
+            # bathrooms
+            is_combi = (
+                (self.property.number_of_rooms <= 4) and
+                (self.property.n_bathrooms in [None, 0, 1])
+            )
+            if is_combi:
+                description = "Upgrade to a new combi boiler"
+            else:
+                description = "Upgrade to a new gas condensing boiler"
+
+            simulation_config = {"mainheat_energy_eff_ending": "Good"}
+            if system_change:
+                # Installation of a boiler improves the hot water system so we need to reflect this in
+                # the outcome of the recommendation
+                heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
+                hotwater_ending_config = HotWaterAttributes("From main system").process()
+                fuel_ending_config = MainFuelAttributes("mains gas (not community)").process()
+
+                heating_simulation_config = check_simulation_difference(
+                    new_config=heating_ending_config, old_config=self.property.main_heating
+                )
+                hotwater_simulation_config = check_simulation_difference(
+                    new_config=hotwater_ending_config, old_config=self.property.hotwater
+                )
+                fuel_simulation_config = check_simulation_difference(
+                    new_config=fuel_ending_config, old_config=self.property.main_fuel
+                )
+
+                simulation_config = {
+                    **simulation_config,
+                    **heating_simulation_config,
+                    **hotwater_simulation_config,
+                    **fuel_simulation_config,
+                    "hot_water_energy_eff_ending": "Good"
+                }
+
+            boiler_costs = self.costs.boiler(
+                is_combi=is_combi,
+                size=f"{boiler_size}kw",
+                exising_room_heaters=exising_room_heaters,
+                system_change=system_change,
+                n_heated_rooms=self.property.data["number-heated-rooms"],
+                n_rooms=self.property.number_of_rooms
+            )
+
+            already_installed = "heating" in self.property.already_installed
+            if already_installed:
+                boiler_costs = override_costs(boiler_costs)
+                description = "Heating system has already been upgraded, no further action needed."
+
+            boiler_recommendation = {
+                "phase": recommendation_phase,
+                "parts": [
+                    # TODO
+                ],
+                "type": "heating",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                "simulation_config": simulation_config,
+                **boiler_costs
+            }
+
+        # We recommend the heating controls
+        # If the property did not previously have a boiler, we combine
+        controls_recommender = HeatingControlRecommender(self.property)
+        controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
+        # We may have 2 recommendations from the heating controls
+
+        if not controls_recommender.recommendation:
+            return
+
+        if system_change:
+            # We combine the heating and controls recommendations, in the case of a system change
+            combined_recommendations = []
+            for controls_recommendation in controls_recommender.recommendation:
+                combined_recommendation = self.combine_heating_and_controls(
+                    controls_recommendations=[controls_recommendation],
+                    heating_simulation_config=simulation_config,
+                    costs=boiler_costs,
+                    description=boiler_recommendation["description"],
+                    phase=recommendation_phase,
+                    heating_controls_only=False,
+                    system_change=True
+                )
+                combined_recommendations.extend(combined_recommendation)
+
+            # Overwrite the existing boiler recommendation
+            self.recommendations.extend(combined_recommendations)
+        else:
+            # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
+            # but we'll only upgrade if we have a heating recommendation
+            has_heating_recommendation = any(
+                recommendation["type"] == "heating" for recommendation in self.recommendations
+            )
+            if has_heating_recommendation:
+                recommendation_phase += 1
+            # The heating controls recommendation is distrinct from the boiler upgrade recommendation
+            # We insert phase into the recommendations for heating controls
+            for recommendation in controls_recommender.recommendation:
+                recommendation["phase"] = recommendation_phase
+
+            self.recommendations.extend(controls_recommender.recommendation)
+
+        return
--- a/Show more
+++ b/Show more