fixed merge conflict with refactor

2026-07-27 23:35:01 +00:00 · 2024-01-16 16:57:45 +00:00 · 2024-01-16 16:57:45 +00:00 · 90ba851cc0
commit 90ba851cc0
parent b82fa4cef7 8904467788
57 changed files with 5589 additions and 419 deletions
--- a/.gitignore
+++ b/.gitignore
@ -265,4 +265,7 @@ model_data/simulation_system/predictions/
 .idea/misc.iml

 adhoc
-adhoc/*
+adhoc/*
+
+etl-router-venv/
+refactor_datasets/
--- a/backend/DbClient.py
+++ b/backend/DbClient.py
@ -0,0 +1,7 @@
+class DbClient:
+
+    def __init__(self):
+        """
+        This class handles interaction with the database
+        """
+        pass
--- a/backend/OrdnanceSurvey.py
+++ b/backend/OrdnanceSurvey.py
@ -0,0 +1,105 @@
+from functools import lru_cache
+import urllib.parse
+import requests
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class OrdnanceSuveyClient:
+
+    def __init__(self, address, postcode, api_key):
+        """
+        This class is tasked with interaction with the ordnance survey API.
+        :param address: The address for the property to search for
+        :param postcode: The postcode for the property to search for
+        """
+
+        self.address = address
+        self.postcode = postcode
+        self.full_address = ", ".join([self.address, self.postcode])
+        self.api_key = api_key
+
+        self.results = None
+
+        self.most_relevant_result = None
+        self.property_type = None
+        self.built_form = None
+        # This will be postcode and address, as returned by the ordnance survey
+        self.address_os = None
+        self.postcode_os = None
+
+    def set_places_address(self):
+        """
+        Given a response from the places api, this function will set the address and postcode of the property
+        """
+
+        if self.most_relevant_result is None:
+            raise ValueError("No results found - run get_places_api first")
+
+        self.address_os = self.most_relevant_result["ADDRESS"]
+        self.postcode_os = self.most_relevant_result["POSTCODE"]
+        # We strip out the postcode from the address as this is already stored separately
+        self.address_os = self.address_os.replace(self.postcode_os, "").strip()
+        # Remove trailing comma
+        self.address_os = self.address_os.rstrip(",").strip()
+        # Convert to title case
+        self.address_os = self.address_os.title()
+        # Make sure postcode is upper case
+        self.postcode_os = self.postcode_os.upper()
+
+    @lru_cache(maxsize=128)
+    def get_places_api(self):
+        """
+        This method is tasked with getting the places api from the Ordnance Survey.
+        """
+
+        if not self.api_key:
+            raise ValueError("Ordnance Survey API key not specified")
+
+        encoded_address_query = urllib.parse.quote(self.full_address)
+        url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
+               f"{self.api_key}")
+        response = requests.get(url)
+        if response.status_code == 200:
+            data = response.json()
+            results = data['results']
+            self.results = results
+
+            # Extract some details about the best match
+            self.most_relevant_result = self.results[0]["DPA"]
+
+            self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
+            self.set_places_address()
+
+        else:
+            logger.info("Could not find any results for the provided address and postcode")
+
+        return {"status": response.status_code}
+
+    def parse_classification_code(self, classification_code: str):
+        """
+        This function will convert the classification code, returned by the OS places api, to a property type that is
+        compatible with the EPC database.
+
+        The various classifications cane be found here:
+        https://osdatahub.os.uk/docs/places/technicalSpecification
+
+        Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
+        For these purposes, we do not need the full classification as this includes non-residential properties. We only
+        parse the ones of interest to us
+        :return:
+        """
+
+        value_map = {
+            # In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
+            'RD': {},
+            'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
+            'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
+            'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
+            'RD06': {'property_type': 'Flat'},
+        }
+
+        mapped = value_map.get(classification_code, {})
+        self.property_type = mapped.get("property_type", "")
+        self.built_form = mapped.get("built_form", "")
--- a/backend/Property.py
+++ b/backend/Property.py
@ -9,18 +9,17 @@ from etl.epc.DataProcessor import EPCDataProcessor
 from etl.epc.Dataset import TrainingDataset
 from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_REMAP
 from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet
-from epc_api.client import EpcClient
 from BaseUtility import Definitions
 from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP
 from recommendations.recommendation_utils import (
-    estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area
+    estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
 )


 ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
-EPC_AUTH_TOKEN = os.environ.get('EPC_AUTH_TOKEN')
 DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)

 logger = setup_logger()
@ -51,13 +50,14 @@ class Property(Definitions):

    spatial = None

-    def __init__(self, id, postcode, address1, epc_record, data=None):
+    def __init__(self, id, postcode, address, epc_record, data=None):

        self.epc_record = epc_record

        self.id = id
+
+        self.address = address
        self.postcode = postcode
-        self.address1 = address1
        self.data = {k.replace("_", "-"): v for k,v in epc_record.get("prepared_epc").items()}
        self.old_data = epc_record.get("old_data")
        self.property_dimensions = None
@ -112,6 +112,9 @@ class Property(Definitions):
        self.insulation_floor_area = None
        self.number_lighting_outlets = epc_record.prepared_epc.get("fixed_lighting_outlets_count")
        self.floor_level = None
+        self.number_of_windows = None
+        self.solar_pv_roof_area = None
+        self.solar_pv_percentage = None

        self.current_adjusted_energy = None
        self.expected_adjusted_energy = None
@ -177,81 +180,51 @@ class Property(Definitions):
            recommendation_record["walls_insulation_thickness_ending"] = "above average"
            recommendation_record["walls_energy_eff_ending"] = "Good"
        else:
-            if recommendation_record["walls_thermal_transmittance_ending"] is None:
-                raise ValueError("We should not have a None value for the u value")
+            wind_turbine_count = int(wind_turbine_count)

-            if recommendation_record["walls_insulation_thickness_ending"] is None:
-                recommendation_record["walls_insulation_thickness_ending"] = "none"
+        self.wind_turbine = {
+            "wind_turbine": wind_turbine_count,
+        }

-        # Update description to indicate it's insulate
-        if recommendation["type"] in ["solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"]:
-            if len(recommendation["parts"]) > 1:
-                raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
+    def set_count_variables(self):

-            recommendation_record["floor_thermal_transmittance_ending"] = recommendation["new_u_value"]
-            # We don't really see above average for this in the training data
-            recommendation_record["floor_insulation_thickness_ending"] = "average"
-            recommendation_record["floor_energy_eff_ending"] = "Good"
-        else:
-            if recommendation_record["floor_thermal_transmittance_ending"] is None:
-                raise ValueError("We should not have a None value for the u value")
+        """
+        For EPC fields that are just counts, we'll set them here
+        These are fields that are integers but may contain additional values such as "" so we can't do a direct
+        conversion straight to an integer
+        :return:
+        """

-            if recommendation_record["floor_insulation_thickness_ending"] is None:
-                recommendation_record["floor_insulation_thickness_ending"] = "none"
+        fields = {
+            "number_of_open_fireplaces": "number-open-fireplaces",
+            "number_of_extensions": "extension-count",
+            "number_of_storeys": "flat-storey-count",
+            "number_of_rooms": "number-habitable-rooms",
+        }

-        if recommendation["type"] in ["loft_insulation", "room_roof_insulation", "flat_roof_insulation"]:
-            recommendation_record["roof_thermal_transmittance_ending"] = recommendation["new_u_value"]
+        null_attributes = ["number_of_storeys", "number_of_rooms"]

-            parts = recommendation["parts"]
-            if len(parts) != 1:
-                raise ValueError("More than one part for roof insulation - investiage me")
+        for attribute, epc_field in fields.items():
+            value = self.data["extension-count"]
+            if value == "" or value in self.DATA_ANOMALY_MATCHES:
+                if attribute in null_attributes:
+                    value = None
+                else:
+                    value = 0
+            else:
+                value = int(value)

-            # This is based on the values we have in the training data
-            valid_numeric_values = [
-                12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400
-            ]
+            setattr(self, attribute, value)

-            proposed_depth = int(parts[0]["depth"])
-            if proposed_depth not in valid_numeric_values:
-                # Take the nearest value for scoring
-                proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
-
-            recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
-            recommendation_record["roof_energy_eff_ending"] = "Very Good"
-        else:
-            # Fill missing roof u-values - this fill is not based on recommended upgrades
-            if recommendation_record["roof_thermal_transmittance_ending"] is None:
-                raise ValueError("We should not have a None value for the u value")
-
-            if recommendation_record["roof_insulation_thickness_ending"] is None:
-                recommendation_record["roof_insulation_thickness_ending"] = "none"
-
-        if recommendation["type"] == "mechanical_ventilation":
-            recommendation_record["mechanical_ventilation_ending"] = 'mechanical, extract only'
-
-        if recommendation["type"] == "sealing_open_fireplace":
-            recommendation_record["number_open_fireplaces_ending"] = 0
-
-        if recommendation["type"] == "low_energy_lighting":
-            recommendation_record["low_energy_lighting_ending"] = 100
-            recommendation_record["lighting_energy_eff_starting"] = "Very Good"
-
-        if recommendation["type"] not in [
-            "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
-            "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
-            "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
-            "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
-        ]:
-            raise NotImplementedError("Implement me")
-
-        return recommendation_record
-
-
-    def get_components(self, cleaned):
+    def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds):
        """
        Given the cleaning that has been performed, we'll use this to identify the property
        components, from roof to walls to windows, heating and hot water
        :param cleaned: This is the dictionary of components found in cleaner.cleaned
+        :param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage
+                                    of the roof that is suitable for solar panels
+        :param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
+                                             solar pv roof area
        :return:
        """

@ -301,6 +274,10 @@ class Property(Definitions):
        self.set_wall_type()
        self.set_floor_type()
        self.set_floor_level()
+        self.set_windows_count()
+        self.set_solar_panel_area(
+            photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds
+        )

    def set_spatial(self, spatial: pd.DataFrame):
        """
@ -368,7 +345,7 @@ class Property(Definitions):
        """
        Utility function for usage in the lambda, for preparing the _rating fields
        """
-        return rating_lookup[field].value if field not in cls.DATA_ANOMALY_MATCHES else None
+        return rating_lookup[field].value if (field not in cls.DATA_ANOMALY_MATCHES) and (field is not None) else None

    def get_property_details_epc(self, portfolio_id: int, rating_lookup):

@ -409,6 +386,7 @@ class Property(Definitions):
            "primary_energy_consumption": self.energy["primary_energy_consumption"],
            "co2_emissions": self.energy["co2_emissions"],
            "adjusted_energy_consumption": self.current_adjusted_energy,
+            "estimated": self.data.get("estimated", False)
        }

        return property_details_epc
@ -664,7 +642,7 @@ class Property(Definitions):
        :return:
        """

-        if self.data["fixed-lighting-outlets-count"] == "":
+        if self.data["fixed-lighting-outlets-count"] in [None, ""]:

            # We check old EPCs and the full SAP EPC

@ -693,3 +671,52 @@ class Property(Definitions):
        """
        self.current_adjusted_energy = current_adjusted_energy
        self.expected_adjusted_energy = expected_adjusted_energy
+
+    def set_windows_count(self):
+        """
+        Using the estimate_windows function, this method will set the number of windows in the property
+        :return:
+        """
+
+        self.number_of_windows = estimate_windows(
+            property_type=self.data["property-type"],
+            built_form=self.data["built-form"],
+            construction_age_band=self.construction_age_band,
+            floor_area=self.floor_area,
+            number_habitable_rooms=self.number_of_rooms,
+            extension_count=float(self.data["extension-count"]),
+        )
+
+    def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
+        """
+        Sets the approximate area of the solar panels
+        :return:
+        """
+
+        if (self.insulation_floor_area is None) and (self.pitched_roof_area is None):
+            raise ValueError(
+                "Need to set insulation floor area and pitched roof area before setting solar pv roof area"
+            )
+
+        photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup(
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds,
+            tenure=self.data["tenure"],
+            built_form=self.data["built-form"],
+            property_type=self.data["property-type"],
+            construction_age_band=self.construction_age_band,
+            is_flat=self.roof["is_flat"],
+            is_pitched=self.roof["is_pitched"],
+            is_roof_room=self.roof["is_roof_room"],
+            floor_area=self.floor_area
+        )
+
+        percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
+        percentage_of_roof = percentage_of_roof / 100
+
+        self.solar_pv_roof_area = (
+            self.insulation_floor_area * percentage_of_roof if self.roof["is_flat"] else
+            self.pitched_roof_area * percentage_of_roof
+        )
+
+        self.solar_pv_percentage = percentage_of_roof
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@ -1,12 +1,114 @@
 import os
 import time
+import re
+
+import usaddress
+import pandas as pd
+import numpy as np
 from epc_api.client import EpcClient
+from backend.OrdnanceSurvey import OrdnanceSuveyClient
+from BaseUtility import Definitions
 from utils.logger import setup_logger
 from typing import List
 from fuzzywuzzy import process

 logger = setup_logger()

+vartypes = {
+    'low-energy-fixed-light-count': "Int64",
+    # 'address': 'str',
+    # 'uprn-source': 'str',
+    'floor-height': 'float',
+    'heating-cost-potential': 'float',
+    'unheated-corridor-length': 'float',
+    'hot-water-cost-potential': 'float',
+    'construction-age-band': 'str',
+    'potential-energy-rating': 'str',
+    'mainheat-energy-eff': 'str',
+    'windows-env-eff': 'str',
+    'lighting-energy-eff': 'str',
+    'environment-impact-potential': "Int64",
+    'glazed-type': 'str',
+    'heating-cost-current': 'float',
+    'address3': 'str',
+    'mainheatcont-description': 'str',
+    'sheating-energy-eff': 'str',
+    'property-type': 'str',
+    'local-authority-label': 'str',
+    'fixed-lighting-outlets-count': "Int64",
+    'energy-tariff': 'str',
+    'mechanical-ventilation': 'str',
+    'hot-water-cost-current': 'str',
+    'county': 'str',
+    'postcode': 'str',
+    'solar-water-heating-flag': 'str',
+    'constituency': 'str',
+    'co2-emissions-potential': 'float',
+    'number-heated-rooms': 'float',
+    'floor-description': 'str',
+    'energy-consumption-potential': 'float',
+    'local-authority': 'str',
+    'built-form': 'str',
+    'number-open-fireplaces': "Int64",
+    'windows-description': 'str',
+    'glazed-area': 'str',
+    # 'inspection-date': str,
+    'mains-gas-flag': 'str',
+    'co2-emiss-curr-per-floor-area': 'float',
+    'address1': 'str',
+    'heat-loss-corridor': 'str',
+    'flat-storey-count': "Int64",
+    'constituency-label': 'str',
+    'roof-energy-eff': 'str',
+    'total-floor-area': 'float',
+    'building-reference-number': 'str',
+    'environment-impact-current': 'float',
+    'co2-emissions-current': 'float',
+    'roof-description': 'str',
+    'floor-energy-eff': 'str',
+    'number-habitable-rooms': 'float',
+    'address2': 'str',
+    'hot-water-env-eff': 'str',
+    'posttown': 'str',
+    'mainheatc-energy-eff': 'str',
+    'main-fuel': 'str',
+    'lighting-env-eff': 'str',
+    'windows-energy-eff': 'str',
+    'floor-env-eff': 'str',
+    'sheating-env-eff': 'str',
+    'lighting-description': 'str',
+    'roof-env-eff': 'str',
+    'walls-energy-eff': 'str',
+    'photo-supply': 'float',
+    'lighting-cost-potential': 'float',
+    'mainheat-env-eff': 'str',
+    'multi-glaze-proportion': 'float',
+    'main-heating-controls': 'str',
+    # 'lodgement-datetime',
+    'flat-top-storey': 'str',
+    'current-energy-rating': 'str',
+    'secondheat-description': 'str',
+    'walls-env-eff': 'str',
+    'transaction-type': 'str',
+    # 'uprn': "Int64",
+    'current-energy-efficiency': 'float',
+    'energy-consumption-current': 'float',
+    'mainheat-description': 'str',
+    'lighting-cost-current': 'float',
+    # 'lodgement-date',
+    'extension-count': "Int64",
+    'mainheatc-env-eff': 'str',
+    'lmk-key': 'str',
+    'wind-turbine-count': "Int64",
+    'tenure': 'str',
+    'floor-level': 'str',
+    'potential-energy-efficiency': "Int64",
+    'hot-water-energy-eff': 'str',
+    'low-energy-lighting': 'float',
+    'walls-description': 'str',
+    'hotwater-description': 'str'
+}
+

 class SearchEpc:
    """
@ -38,53 +140,127 @@ class SearchEpc:
        self,
        address1: str,
        postcode: str,
-        address2: str = None,
-        address3: str = None,
-        address4: str = None,
-        max_retries: int = None
+        auth_token: str,
+        os_api_key: str,
+        full_address: str | None = None,
+        max_retries: int = None,
+        uprn: [int, None] = None,
+        size=None,
+        property_type=None,
    ):
        """
        Address lines 1 and postcode are mandatory fields. The other address lines are optional
        but can be used to find the epc for the home, if address1 and postcode are insufficient
        :param address1: string, propery's address line 1
        :param postcode: string, propery's postcode
-        :param address2: string, optional, propery's address line 2
-        :param address3: string, optional, propery's address line 3
-        :param address4: string, optional, propery's address line 4
+        :param full_address: string, optional parameter, the full address of the property
+        :param max_retries: int, optional, number of retries to make when searching the api
+        :param uprn: int, optional, the uprn of the property
+        :param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
+                        default
+        :param property_type: str, optional, the property type of the property, if known before hand
        """

        self.address1 = address1
        self.postcode = postcode
-        self.address2 = address2
-        self.address3 = address3
-        self.address4 = address4
+        self.full_address = full_address
+        self.uprn = uprn
+        self.house_number = self.get_house_number(self.address1)
+        self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)

        self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES

-        self.client = EpcClient(auth_token=os.getenv("EPC_AUTH_TOKEN"))
+        self.client = EpcClient(auth_token=auth_token)
+        self.ordnance_survey_client = OrdnanceSuveyClient(
+            address=self.address1, postcode=self.postcode, api_key=os_api_key
+        )

        self.data = None
+        self.newest_epc = None
+        self.older_epcs = None
+        self.full_sap_epc = None

-    def search(self):
+        # These are the address and postcode values, which we store in the database
+        self.address_clean = None
+        self.postcode_clean = None
+
+        self.size = size if size is not None else 25
+
+        self.property_type = property_type
+
+    @classmethod
+    def get_house_number(cls, address: str) -> str | None:
+        """
+        This method will use the usaddress library to parse an address and extract the house number
+        :return:
+        """
+
+        parsed = usaddress.parse(address)
+        parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
+        parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
+
+        if parsed_house_number is None:
+            # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
+            # we also add a custom approach
+
+            # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
+            pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
+
+            match = re.search(pattern, address)
+
+            if match:
+                # Return the first non-None group found
+                return next(g for g in match.groups() if g is not None)
+            else:
+                return None
+
+        # Remove training commas
+        parsed_house_number = parsed_house_number.replace(",", "")
+
+        return parsed_house_number
+
+    @staticmethod
+    def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
+        # Regular expression to find the first occurrence of one or more digits
+
+        if house_number is None:
+            return None
+
+        match = re.search(r'\d+', house_number)
+
+        if match:
+            return int(match.group())
+        else:
+            return None
+
+    def get_epc(self, params=None, size=None):
        # Get the EPC data with retries
+        size = size if size is not None else self.size
+        if params is None:
+            if self.uprn:
+                params = {"uprn": self.uprn}
+            else:
+                params = {"address": self.address1, "postcode": self.postcode}

        for retry in range(self.max_retries):
            try:
-                response = self.client.domestic.search(
-                    params={"address": self.address1, "postcode": self.postcode}
-                )
+
+                if "uprn" in params:
+                    # We use the direct call method inside, since we need to implement uprn as a valid
+                    # parameter for the search function
+                    url = os.path.join(self.client.domestic.host, "search")
+                    response = self.client.domestic.call(method="get", url=url, params=params)
+                else:
+                    response = self.client.domestic.search(params=params, size=size)

                if response:
                    self.data = response
                    return self.SUCCESS

                if retry > 0:
-                    print("Failed previous attempt but retry successful")
+                    logger.info("Failed previous attempt but retry successful")
                # If we got nothing, final try
                if not response:
-                    # TODO: Make a call to OS uprn service and get the address' uprn, just in case there is an
-                    #       issue with how we are searching the api
-
                    return {
                        "status": 204,
                        "message": "no data",
@ -127,7 +303,6 @@ class SearchEpc:
        if len(uprns) == 1:
            return rows

-        logger.error("Multiple UPRNS found - we should use an alternate method of searching - TODO")
        if property_type is not None:
            # We can do a filter on the property type
            rows_filtered = [r for r in rows if r["property-type"] == property_type]
@ -147,7 +322,24 @@ class SearchEpc:

            return rows

-    def retrieve(self, property_type=None, address=None):
+    @staticmethod
+    def format_address(newest_epc):
+        """
+        Format address and postcode for storage in the database
+        """
+        postcode = newest_epc["postcode"]
+        address = newest_epc["address"]
+
+        # Format them
+        address = address.replace(postcode, "").strip()
+        address = address.rstrip(",").strip()
+        address = address.title()
+
+        postcode = postcode.upper()
+
+        return address, postcode
+
+    def extract_epc_data(self, address=None):

        """
        Given a successful search, this method will format the data and return it
@ -163,7 +355,7 @@ class SearchEpc:
        # Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the
        # property further

-        rows = self.filter_rows(rows, property_type=property_type, address=None)
+        rows = self.filter_rows(rows, property_type=self.property_type, address=None)
        rows = self.filter_rows(rows, property_type=None, address=address)

        # We now check for a full sap epc:
@ -173,7 +365,26 @@ class SearchEpc:
        # Finally, we identify the newest epc and the rest, and then return
        newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)

-        return newest_epc, older_epcs, full_sap_epc
+        # Retrieve postcode and address
+        address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
+
+        # Ge the uprn from the newest record for this home
+        uprns = {r["uprn"] for r in rows if r["uprn"]}
+        # We can sometimes have no uprn for a property
+        if (len(uprns) == 0) and len(rows) > 0:
+            logger.warning("Found data but missing uprn")
+        elif len(uprns) != 1:
+            # There is a possibility that we have multiple UPRNs for a single property, which is an error
+            addresses = {r["address"] for r in rows}
+            if len(addresses) == 1:
+                # Take the uprn from the most recent
+                uprns = {newest_epc["uprn"]}
+            else:
+                raise ValueError("Multiple UPRNs found - investigate me")
+
+        uprn = uprns.pop() if uprns else None
+
+        return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn

    @staticmethod
    def filter_newest_epc(list_of_epcs: List):
@ -186,8 +397,334 @@ class SearchEpc:
            return {}, []

        if len(newest_response) != 1:
-            raise Exception("More than one result found for this address - investigate me")
+            # It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that
+            # were lodged at the exact same time. In this case, we will take the first one
+            newest_response = [newest_response[0]]

        older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]

        return newest_response[0], older_epcs
+
+    @staticmethod
+    def _get_epc_mode(col: str, epc_data: pd.DataFrame):
+        """
+        Simple method to extract the mode value from the EPC data
+        :param col: name of the column to take the mode of
+        :param epc_data: pandas dataframe of epc data
+        """
+
+        mode_value = epc_data[[col]].mode(dropna=True)
+        if len(mode_value) != 1:
+            raise NotImplementedError("TODO: Handle multiple modes")
+        mode_value = mode_value.iloc[0][col]
+
+        return mode_value
+
+    def fetch_nearby_epcs(
+        self, initial_postcode: str,
+        lmks_to_drop: list[str] | None = None,
+        built_form: str = "",
+        property_type: str = ""
+    ):
+        """
+        Fetches and processes EPC data for a given initial postcode, applying successive trimming
+        to the postcode and filtering the data until a non-empty result set is found.
+
+        The function queries the EPC API with the provided postcode, and if no data is found or
+        if the data doesn't meet certain criteria, it progressively shortens the postcode by
+        removing the last character and retries the query. This process continues until a valid
+        set of EPC data is obtained or the postcode is exhausted.
+
+        Additional filtering is applied to the obtained EPC data based on 'lmk-key', 'built-form',
+        and 'property-type'. The data is also processed to extract and numerically interpret house
+        numbers, calculate house number distances, and apply weights based on these distances.
+
+        :param initial_postcode: The initial full postcode for the EPC data query.
+        :param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
+        :param built_form: The 'built-form' value to be used for filtering the EPC data.
+        :param property_type: The 'property-type' value to be used for filtering the EPC data.
+        :return:
+        """
+
+        property_type_api_map = {
+            "Bungalow": "bungalow",
+            "Flat": "flat",
+            "House": "house",
+            "Maisonette": "maisonette",
+            "Park home": "park home",
+        }
+
+        postcode = initial_postcode
+        while postcode:
+            # Fetch data from EPC API
+            params = {"postcode": postcode}
+            if property_type:
+                params["property-type"] = property_type_api_map[property_type]
+
+            # We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes
+            epc_response = self.get_epc(params=params, size=100)
+
+            if epc_response["status"] == 200:
+                epc_data = pd.DataFrame(self.data["rows"])
+
+                if lmks_to_drop is not None:
+                    epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]
+
+                if not epc_data.empty:
+                    # Further processing of the EPC data
+                    epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], format='mixed')
+                    epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
+                    epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
+                    epc_data["numeric_house_number"] = epc_data["house_number"].apply(
+                        lambda house_num: self.extract_numeric_housenumber_part(house_num)
+                    )
+
+                    if self.numeric_house_number is None:
+                        # If we don't have a house number, we treat all weights as equal
+                        epc_data["weight"] = 1
+                    else:
+                        epc_data["house_number_distance"] = abs(
+                            epc_data["numeric_house_number"] - self.numeric_house_number
+                        )
+                        # # We add 1, just in case we have a 0 weight (e.g. comparing house number 7a to 7b, or 9A to 9)
+                        # epc_data["weight"] = 1 / (epc_data["house_number_distance"] + 1)
+                        # # If we have a home without a house number, fill that weight with average
+                        # epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
+                        # # Finally, we might not have any house numbers whatsoever so everything could be
+                        # # missing, so we fill with 1
+                        # epc_data["weight"] = epc_data["weight"].fillna(1)
+                        # TODO: Testing
+                        # If the postcode is different from the initial postcode, it doesn't make sense to have
+                        # any weightings
+                        if all(pd.isnull(epc_data["house_number_distance"])) or (postcode != initial_postcode):
+                            epc_data["weight"] = 1
+                        else:
+                            epc_data["weight"] = 1 / np.sqrt(epc_data["house_number_distance"] + 1)
+                            epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
+
+                    estimation_property_type = self._estimate_str(
+                        key="property-type", estimation_data=epc_data
+                    ) if property_type == "" else property_type
+
+                    epc_built_form = self._estimate_str(
+                        key="built-form",
+                        estimation_data=epc_data[epc_data["property-type"] == estimation_property_type]
+                    )
+
+                    if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]:
+                        estimation_built_form = "End-Terraced"
+                    elif (built_form == "") or (pd.isnull(built_form)):
+                        estimation_built_form = epc_built_form
+                    else:
+                        estimation_built_form = built_form
+
+                    # We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
+                    # on maisonette
+                    # We also add some additional logic for Park homes, because they are far less common than other
+                    # property types
+
+                    is_maisonette_with_bad_built_form = (estimation_property_type == "Maisonette") & (
+                        estimation_built_form in ["Detached", "Semi-Detached"]
+                    )
+
+                    is_park_home_without_built_form = (estimation_property_type == "Park home") & (
+                        sum(epc_data["built-form"] == estimation_built_form) == 0
+                    )
+
+                    has_missing_built_form = not estimation_built_form
+
+                    if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
+                        epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
+                    else:
+                        epc_data = epc_data[
+                            (epc_data["built-form"] == estimation_built_form) & (
+                                epc_data["property-type"] == estimation_property_type)
+                            ]
+
+                    if not epc_data.empty:
+                        return epc_data  # Return the filtered data if it's not empty
+
+            # Shorten the postcode by one character for the next iteration
+            postcode = postcode[:-1].rstrip()
+
+        # If loop finishes without a valid response, raise an exception
+        raise Exception("Unable to find postcode data after trimming - investigate me")
+
+    def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
+        """
+        For a property that does not have an EPC, we retrieve the EPC data for the closest properties
+        and estimate the EPC for the property in question.
+
+        Note - do we have postcodes with just a single address? We would need to use a different approach
+        to find the closest homes
+        :param property_type:   This is the property type of the property we are estimating, that can be retrieved from
+                                the ordnance survey api
+        :param built_form:      This is the built form of the property we are estimating, that can be retrieved from
+                                the ordnance survey api
+        :param lmks_to_drop:    This is a list of LMK keys that should be dropped from the estimation process. This
+                                is used as an override for testing, to drop EPCs for the property we are testing
+        :return:
+        """
+
+        # From the ordnance survey data, we want to determine the property type and then use only similar property
+        # types for the estimation process
+        epc_data = self.fetch_nearby_epcs(
+            initial_postcode=self.postcode,
+            lmks_to_drop=lmks_to_drop,
+            built_form=built_form,
+            property_type=property_type
+        )
+
+        # For each attribute, we need to determine the datatype and use an appropriate method
+        # to estimate.
+        estimated_epc = {}
+        for key, vartype in vartypes.items():
+            epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
+            epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
+            estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
+            estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
+            estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
+            if vartype == "Int64":
+                # We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
+                # so this handles this
+                estimation_data[key] = estimation_data[key].astype(float).astype(vartype)
+            else:
+                estimation_data[key] = estimation_data[key].astype(vartype)
+
+            if estimation_data.shape[0] == 0:
+                estimated_epc[key] = None
+                continue
+
+            if vartype == "Int64":
+                estimated_value = self._estimate_int(estimation_data, key)
+            elif vartype == "float":
+                estimated_value = self._estimate_float(estimation_data, key)
+            elif vartype == "str":
+                estimated_value = self._estimate_str(estimation_data, key)
+            else:
+                raise NotImplementedError("estimation method not implemented for type")
+
+            estimated_epc[key] = estimated_value
+
+        # Insert an estimated lodgement datetime, with a weighted average
+        estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
+        # Extract logement date
+        estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
+
+        estimated_epc["postcode"] = self.postcode
+        estimated_epc["uprn"] = self.uprn
+        estimated_epc["address"] = self.full_address
+        # Indicate that this epc was estimated
+        estimated_epc["estimated"] = True
+
+        return estimated_epc
+
+    @staticmethod
+    def calculate_weighted_lodgement_datetime(epc_data):
+        numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
+
+        # Calculate the weighted sum of dates
+        weighted_sum = (numeric_dates * epc_data['weight']).sum()
+
+        # Calculate the sum of weights
+        total_weights = epc_data['weight'].sum()
+
+        # Calculate the weighted mean in numeric format
+        weighted_mean_numeric = weighted_sum / total_weights
+
+        # Convert the numeric weighted mean back to datetime
+        weighted_mean_datetime = pd.to_datetime(weighted_mean_numeric)
+
+        return weighted_mean_datetime
+
+    @staticmethod
+    def _estimate_int(estimation_data, key):
+        return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]))
+
+    @staticmethod
+    def _estimate_float(estimation_data, key):
+        return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]), 2)
+
+    @staticmethod
+    def _estimate_str(estimation_data, key):
+        agg = estimation_data.groupby(key)["weight"].sum().reset_index()
+        agg = agg[agg["weight"] == agg["weight"].max()]
+        if agg.shape[0] != 1:
+            # If we have multiple modes, we take the more recent data on average
+            recent_grouped = estimation_data[
+                estimation_data[key].isin(agg[key].values)
+            ].groupby(key)["lodgement-datetime"].mean()
+
+            newest_group = recent_grouped.idxmax()
+            return newest_group
+
+        return agg[key].values[0]
+
+    def find_property(self, skip_os=False):
+        """
+        This method will attempt to identify a property. It will, at first, use the EPC api to try and
+        find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to
+        find the UPRN of the address.
+
+        Because no result may have been provided by the EPC api because of formatting issues with the address,
+        if the ordnance survey api is used and the uprn retrieved, the EPC api is queried again with the UPRN, just
+        as a final check to see if there is any EPC data.
+
+        If there is no EPC data, the epc data will be estimated based on the surrounding properties
+        """
+
+        # Step 1: use the epc api to find the property and uprn
+        response = self.get_epc()
+
+        if response["status"] == 200:
+            (
+                self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
+            ) = self.extract_epc_data(address=self.full_address)
+            return
+
+        # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn
+        if skip_os:
+            if self.ordnance_survey_client.property_type is not None:
+                # We can try and estimate
+                estimated_epc = self.estimate_epc(
+                    property_type=self.ordnance_survey_client.property_type,
+                    built_form=self.ordnance_survey_client.built_form
+                )
+                self.newest_epc = estimated_epc
+                self.older_epcs = []
+                self.full_sap_epc = {}
+
+                # Finally, set a standardised address 1 and postcode
+                self.address_clean = self.ordnance_survey_client.address_os
+                self.postcode_clean = self.ordnance_survey_client.postcode_os
+            return
+
+        os_response = self.ordnance_survey_client.get_places_api()
+
+        if os_response["status"] != 200:
+            # Investigate this if it happens
+            raise Exception("Unable to find property - investigate me")
+
+        # Step 3: Now that we have a urpn, do another check against the epc api, this time searching with the uprn
+        self.uprn = self.ordnance_survey_client.most_relevant_result["UPRN"]
+        response = self.get_epc()
+        if response["status"] == 200:
+            (
+                self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
+            ) = self.extract_epc_data()
+            return
+
+        # Step 4: If we still don't have an EPC, we estimate the EPC data
+        self.full_address = self.ordnance_survey_client.most_relevant_result["ADDRESS"]
+        estimated_epc = self.estimate_epc(
+            property_type=self.ordnance_survey_client.property_type,
+            built_form=self.ordnance_survey_client.built_form
+        )
+        self.newest_epc = estimated_epc
+        self.older_epcs = []
+        self.full_sap_epc = {}
+
+        # Finally, set a standardised address 1 and postcode
+        self.address_clean = self.ordnance_survey_client.address_os
+        self.postcode_clean = self.ordnance_survey_client.postcode_os
+        return
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -13,6 +13,7 @@ class Settings(BaseSettings):
    HEAT_PREDICTIONS_BUCKET: str
    PLAN_TRIGGER_BUCKET: str
    EPC_AUTH_TOKEN: str
+    ORDNANCE_SURVEY_API_KEY: str
    DB_HOST: str
    DB_PASSWORD: str
    DB_USERNAME: str
--- a/backend/app/db/functions/property_functions.py
+++ b/backend/app/db/functions/property_functions.py
@ -11,7 +11,7 @@ from backend.app.db.models.portfolio import (
 from sqlalchemy.orm.exc import NoResultFound


-def create_property(session: Session, portfolio_id: int, address: str, postcode: str) -> (int, bool):
+def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str) -> (int, bool):
    """
    This function will create a record for the property in the database if it does not exist.
    If it does exist, it will just update the updated_at field.
@ -25,7 +25,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
    try:
        # Attempt to fetch the existing property
        existing_property = session.query(PropertyModel).filter_by(
-            address=address, postcode=postcode, portfolio_id=portfolio_id
+            uprn=uprn, portfolio_id=portfolio_id
        ).one()

        # Update the 'updated_at' field
@ -43,6 +43,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
            address=address,
            postcode=postcode,
            portfolio_id=portfolio_id,
+            uprn=uprn,
            creation_status=PropertyCreationStatus.LOADING,
            status=PortfolioStatus.ASSESSMENT.value,
            has_pre_condition_report=False,
--- a/backend/app/db/models/materials.py
+++ b/backend/app/db/models/materials.py
@ -19,7 +19,6 @@ class MaterialType(enum.Enum):
    flat_roof_insulation = "flat_roof_insulation"
    room_roof_insulation = "room_roof_insulation"
    windows_glazing = "windows_glazing"
-    

    iwi_wall_demolition = "iwi_wall_demolition"
    iwi_vapour_barrier = "iwi_vapour_barrier"
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@ -153,6 +153,7 @@ class PropertyDetailsEpcModel(Base):
    primary_energy_consumption = Column(Float)
    co2_emissions = Column(Float)
    adjusted_energy_consumption = Column(Float)
+    estimated = Column(Boolean, default=False)


 class PropertyDetailsSpatial(Base):
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -4,6 +4,7 @@ import numpy as np
 import pandas as pd
 from epc_api.client import EpcClient
 from etl.epc.Record import EPCRecord
+from backend.SearchEpc import SearchEpc
 from fastapi import APIRouter, Depends
 from sqlalchemy.exc import IntegrityError, OperationalError
 from sqlalchemy.orm import sessionmaker
@ -30,6 +31,8 @@ from backend.ml_models.api import ModelApi
 from backend.Property import Property
 from etl.epc.DataProcessor import EPCDataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
 from recommendations.optimiser.CostOptimiser import CostOptimiser
 from recommendations.optimiser.GainOptimiser import GainOptimiser
 from recommendations.optimiser.optimiser_functions import prepare_input_measures
@ -43,54 +46,6 @@ logger = setup_logger()

 BATCH_SIZE = 5

-class DummyDownloader:
-
-    def __init__(self, postcode, address1, id, epc_client):
-        self.id = id
-        self.postcode = postcode
-        self.address1 = address1
-
-        self.data = None
-        self.old_data = None
-
-        self.epc_client = epc_client
-    
-    def search_address_epc(self):
-        """
-        This method searches for an address in the EPC database and returns the first result
-        :return: property data
-        """
-        if self.data:
-            return
-
-        # This will fail if a property does not have an EPC - this has been documented as a case to handle
-        response = self.epc_client.domestic.search(params={"address": self.address1, "postcode": self.postcode})
-
-        # Check if we have a full sap EPC
-        self.full_sap_epc = [r for r in response["rows"] if r["transaction-type"] == "new dwelling"]
-        self.full_sap_epc = self.full_sap_epc[0] if self.full_sap_epc else self.full_sap_epc
-
-        if len(response["rows"]) > 1:
-            newest_response = [
-                r for r in response["rows"] if
-                r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in response["rows"]])
-            ]
-            if len(newest_response) > 1:
-                raise Exception("More than one result found for this address - investigate me")
-
-            # We'll keep old EPCs in case it contains information, not present on the newest one
-            self.old_data = [epc for epc in response["rows"] if epc["lmk-key"] != newest_response[0]["lmk-key"]]
-
-            response["rows"] = newest_response
-
-        self.data = response["rows"][0]
-        # For the moment, if we don't have a UPRN, we don't do anything about it, however we'll handle this in
-        # the future by using the Ordnance Survey places API
-        if not self.data["uprn"]:
-            logger.warning("We do not have a UPRN for this property")
-        else:
-            self.uprn = int(self.data["uprn"])
-
 router = APIRouter(
    prefix="/plan",
    tags=["plan"],
@ -103,37 +58,34 @@ router = APIRouter(
@router.post("/trigger")
 async def trigger_plan(body: PlanTriggerRequest):
    logger.info("Connecting to db")
-    # session = sessionmaker(bind=db_engine)()
+    session = sessionmaker(bind=db_engine)()
    created_at = datetime.now().isoformat()

    try:
        session.begin()
        logger.info("Getting the inputs")
-        Body = {'portfolio_id': '56', 'housing_type': 'Social', 'goal': 'Increase EPC', 'goal_value': 'A', 'trigger_file_path': '8/56/windows_portfolio_inputs.csv'}
-        body = PlanTriggerRequest(**Body)
        epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
-        uprn_filenames = read_dataframe_from_s3_parquet(
-            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
-        )
-        cleaning_data = read_parquet_from_s3(
-            bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
-        )

        input_properties = []

        for config in plan_input:
            # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
-            # TODO: implment validation. We should also standardise postcode and address in some fashion as
-            #       a postcode of abcdef would be considered different to ABCDEF
+
+            epc_searcher = SearchEpc(
+                address1=config["address"],
+                postcode=config["postcode"],
+                auth_token=get_settings().EPC_AUTH_TOKEN,
+                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
+            )
+            epc_searcher.find_property()
            # Create a record in db
            property_id, is_new = create_property(
-                session, portfolio_id=body.portfolio_id, address=config['address'], postcode=config['postcode']
+                session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
            )
            # if a new record was not created, we don't produduce recommendations
            if not is_new:
                continue
-            # TODO: Need to add heat demand target

            create_property_targets(
                session,
@ -143,29 +95,21 @@ async def trigger_plan(body: PlanTriggerRequest):
                heat_demand_target=None
            )

-            epc_downloader = DummyDownloader(id=0, epc_client=epc_client, postcode=config['postcode'], address1=config['address'])
-            epc_downloader.search_address_epc()
-
            epc_records ={
-                'original_epc': epc_downloader.data.copy(),
-                'full_sap_epc': epc_downloader.full_sap_epc.copy() if epc_downloader.full_sap_epc else [],
-                'old_data': epc_downloader.old_data.copy() if epc_downloader.old_data else []
+                'original_epc': epc_searcher.newest_epc,
+                'full_sap_epc': epc_searcher.full_sap_epc,
+                'old_data': epc_searcher.old_data,
            }

            prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) # This uses all the epc records to clean the data
-            
-            p = Property(
+          
+            input_properties.append(
+                Property(
                    id=property_id,
                    address1=config['address'],
                    postcode=config['postcode'],
                    epc_record=prepared_epc,
                )
-            
-            logger.info("Getting spatial data")
-
-            p.get_spatial_data(uprn_filenames)
-            input_properties.append(
-                p
            )


@ -180,10 +124,19 @@ async def trigger_plan(body: PlanTriggerRequest):
        materials = get_materials(session)
        cleaned = get_cleaned()

-        logger.info("Getting components and epc recommendations")
+        uprn_filenames = read_dataframe_from_s3_parquet(
+            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
+        )
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)

-        # TODO: Move this to a class. We probably want a Recommender class which takes the injects the optimisers
-        #      in as a dependency and then the optimisers can take the input measures in as part of the setup() method
+        logger.info("Getting spatial data")
+        for p in input_properties:
+            p.get_spatial_data(uprn_filenames)
+
+        logger.info("Getting components and epc recommendations")

        recommendations = {}
        recommendations_scoring_data = []
@ -192,7 +145,7 @@ async def trigger_plan(body: PlanTriggerRequest):
        for p in input_properties:

            # Property recommendations
-            p.get_components(cleaned)
+            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)

            recommender = Recommendations(property_instance=p, materials=materials)
            property_recommendations = recommender.recommend()
--- a/backend/app/plan/utils.py
+++ b/backend/app/plan/utils.py
@ -175,11 +175,34 @@ def create_recommendation_scoring_data(
        scoring_dict["LOW_ENERGY_LIGHTING_ENDING"] = 100
        scoring_dict["LIGHTING_ENERGY_EFF_STARTING"] = "Very Good"

+    if recommendation["type"] == "windows_glazing":
+        scoring_dict["MULTI_GLAZE_PROPORTION_ENDING"] = 100
+        scoring_dict["WINDOWS_ENERGY_EFF_ENDING"] = "Average"
+
+        is_secondary_glazing = recommendation["is_secondary_glazing"]
+
+        if scoring_dict["glazing_type_ENDING"] == "multiple":
+            pass
+        elif scoring_dict["glazing_type_ENDING"] == "single":
+            scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "double"
+        elif scoring_dict["glazing_type_ENDING"] == "double":
+            scoring_dict["glazing_type_ENDING"] = "multiple" if is_secondary_glazing else "double"
+        elif scoring_dict["glazing_type_ENDING"] == "secondary":
+            scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "multiple"
+        elif scoring_dict["glazing_type_ENDING"] in ["triple", "high performance"]:
+            scoring_dict["glazing_type_ENDING"] = "multiple"
+        else:
+            raise ValueError("Invalid glazing type - implement me")
+
+    if recommendation["type"] == "solar_pv":
+        scoring_dict["PHOTO_SUPPLY_ENDING"] = recommendation["photo_supply"]
+
    if recommendation["type"] not in [
        "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
        "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
        "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
-        "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
+        "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
+        "windows_glazing", "solar_pv"
    ]:
        raise NotImplementedError("Implement me")

--- a/backend/app/utils.py
+++ b/backend/app/utils.py
@ -121,19 +121,6 @@ def epc_to_sap_lower_bound(epc: str):
        raise ValueError("EPC rating should be between A and G")


-def read_parquet_from_s3(bucket_name, file_key):
-    client = boto3.client('s3')
-
-    # Get the object
-    s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
-
-    # Read the CSV body into a DataFrame
-    csv_body = s3_object["Body"].read()
-    df = pd.read_parquet(BytesIO(csv_body))
-
-    return df
-
-
 def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
    """
    Save a pandas DataFrame to S3 as a Parquet file.
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@ -19,7 +19,9 @@ class PropertyValuation:
        100070505235: 344000,  # Based on Zoopla's estimation of 131 School road, which is also semi-detached
        100070513306: 182000,  # Based on Zoopla's estimation of 61 Simmons Drive
        100071306896: 77000,  # Based on Flat 2 of 44 Wedgewood Road on Zoopla
-        100021192109: 650000  # Based on Zoopla
+        100021192109: 650000,  # Based on Zoopla
+        766249482: 358000,  # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached
+        100120703802: 277000,  # Based on Zoopla
    }

    # We base our valuation uplifts on a number of sources
@ -93,7 +95,13 @@ class PropertyValuation:
        value = cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)

        if not value:
-            raise ValueError("Have not implemented valuation for this property")
+            return {
+                "current_value": None,
+                "lower_bound_increased_value": None,
+                "upper_bound_increased_value": None,
+                "average_increased_value": None,
+                "average_increase": None
+            }

        current_epc = property_instance.data["current-energy-rating"]
        # We get the spectrum of ratings between the current and target EPC
@ -119,4 +127,5 @@ class PropertyValuation:
            "lower_bound_increased_value": value * (1 + min_increase),
            "upper_bound_increased_value": value * (1 + max_increase),
            "average_increased_value": value * (1 + avg_increase),
+            "average_increase": value * (1 + avg_increase) - value
        }
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@ -2,8 +2,7 @@ import pandas as pd
 import requests
 from requests.exceptions import RequestException
 from utils.logger import setup_logger
-from utils.s3 import save_dataframe_to_s3_parquet
-from backend.app.utils import read_parquet_from_s3
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet

 logger = setup_logger()

@ -125,7 +124,7 @@ class ModelApi:

            # Retrieve the predictions
            predictions_df = pd.DataFrame(
-                read_parquet_from_s3(
+                read_dataframe_from_s3_parquet(
                    bucket_name=predictions_bucket,
                    file_key=response["storage_filepath"].split(predictions_bucket + "/")[1]
                )
--- a/backend/requirements/base.txt
+++ b/backend/requirements/base.txt
@ -35,4 +35,5 @@ mip==1.15.0
 boto3==1.28.3
 pandas==1.5.3
 pyarrow==12.0.1
-textblob
+textblob
+usaddress==0.5.10
--- a/backend/tests/test_property.py
+++ b/backend/tests/test_property.py
@ -9,6 +9,7 @@ from etl.epc_clean.EpcClean import EpcClean
 mock_epc_response = {
    "rows": [
        {
+            "tenure": "rental (social)",
            "lmk-key": 1,
            "uprn": 1,
            "number-habitable-rooms": 5,
@ -17,7 +18,7 @@ mock_epc_response = {
            "inspection-date": "2023-06-01",
            'lodgement-datetime': '2023-06-01 20:29:01',
            "some-other-key": "some-value",
-            "roof-description": "Roof Description",
+            "roof-description": "pitched, no insulation",
            "walls-description": "Walls Description",
            "windows-description": "Windows Description",
            "mainheat-description": "Main Heating Description",
@ -37,7 +38,8 @@ mock_epc_response = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        },
        {
            "lmk-key": 2,
@ -68,7 +70,8 @@ mock_epc_response = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        }
    ]
 }
@ -100,7 +103,8 @@ mock_epc_response_dupe = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        },
        {
            "lmk-key": 2,
@ -128,7 +132,8 @@ mock_epc_response_dupe = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        },
        {
            "lmk-key": 3,
@ -156,36 +161,62 @@ mock_epc_response_dupe = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
+            "floor-level": "Ground"
        }
    ]
 }


 class TestProperty:
+
    @pytest.fixture(autouse=True)
-    def property_instance(self, mock_epc_client, mock_cleaner):
-        property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client)
+    def mock_photo_supply_lookup(self):
+        return pd.DataFrame(
+            [
+                dict(
+                    tenure="rental (social)",
+                    built_form="Detached",
+                    property_type="House",
+                    construction_age_band="England and Wales: 1967-1975",
+                    is_flat=False,
+                    is_pitched=True,
+                    is_roof_room=False,
+                    floor_area_decile=2,
+                    photo_supply_median=40
+                )
+            ]
+        )
+
+    @pytest.fixture(autouse=True)
+    def mock_floor_area_decile_thresholds(self):
+        return pd.DataFrame(
+            {"floor_area_decile_thresholds": [0, 10, 30, 50]}
+        )
+
+    @pytest.fixture(autouse=True)
+    def property_instance(self, mock_cleaner):
+        property_instance = Property(id=1, postcode="AB12CD", address="Test Address", data=mock_epc_response["rows"][0])
        return property_instance

    @pytest.fixture(autouse=True)
-    def property_instance_dupe_data(self, mock_epc_client_dupe_data):
-        property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data)
+    def property_instance_dupe_data(self):
+        property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address")
        return property_instance_dupe_data

-    @pytest.fixture
-    def mock_epc_client(self):
-        mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
-        mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
-        mock_epc_client.auth_token = "mocked_auth_token"
-        return mock_epc_client
-
-    @pytest.fixture
-    def mock_epc_client_dupe_data(self):
-        mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
-        mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
-        mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
-        return mock_epc_client_dupe_data
+    # @pytest.fixture
+    # def mock_epc_client(self):
+    #     mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
+    #     mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
+    #     mock_epc_client.auth_token = "mocked_auth_token"
+    #     return mock_epc_client
+    #
+    # @pytest.fixture
+    # def mock_epc_client_dupe_data(self):
+    #     mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
+    #     mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
+    #     mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
+    #     return mock_epc_client_dupe_data

    @pytest.fixture
    def mock_cleaner(self):
@ -224,7 +255,11 @@ class TestProperty:
        }

        mock_cleaner.cleaned = {
-            "roof-description": [{"original_description": "Roof Description"}],
+            "roof-description": [
+                {"original_description": "Roof Description"},
+                {"original_description": "pitched, no insulation", "is_pitched": True, "is_flat": False,
+                 "is_roof_room": False}
+            ],
            "walls-description": [walls_data],
            "windows-description": [{"original_description": "Windows Description"}],
            "mainheat-description": [{"original_description": "Main Heating Description"}],
@ -235,37 +270,32 @@ class TestProperty:
        }
        return mock_cleaner

-    def test_init(self, mock_epc_client):
-        inst1 = Property(0, "AB12CD", "Test Address", epc_client=mock_epc_client)
-        # Should be mocked auth token
-        assert inst1.epc_client.auth_token == "mocked_auth_token"
+    def test_init(self):
+        inst1 = Property(0, postcode="AB12CD", address="Test Address")

-        inst2 = Property(3, "AB12CD", "Test Address", epc_client=mock_epc_client)
-        assert inst2.epc_client.auth_token
+        assert inst1.data is None

-        inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data"}, epc_client=mock_epc_client)
-        assert inst3.data == {"some": "data"}
+        inst2 = Property(3, "AB12CD", "Test Address")
+        assert inst2.id == 3

-        data = inst3.search_address_epc()
-        assert data is None
+        inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data", "uprn": 123})
+        assert inst3.data == {"some": "data", "uprn": 123}

-    def test_search_address_epc(self, property_instance):
-        # Call the method to test
-        property_instance.search_address_epc()
-
-        # Verify that the correct data is being returned
-        assert property_instance.data == mock_epc_response["rows"][0]
-
-    def test_search_address_epc_multiple_results(self, property_instance_dupe_data, mock_epc_client_dupe_data):
-        with pytest.raises(Exception, match="More than one result found for this address - investigate me"):
-            property_instance_dupe_data.search_address_epc()
-
-    def test_get_components(self, property_instance, mock_cleaner, mock_epc_client):
-        property_instance.search_address_epc()
-        property_instance.get_components(mock_cleaner.cleaned)
+    def test_get_components(
+        self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+    ):
+        property_instance.get_components(
+            mock_cleaner.cleaned,
+            photo_supply_lookup=mock_photo_supply_lookup,
+            floor_area_decile_thresholds=mock_floor_area_decile_thresholds
+        )

        # Verify that the components are set correctly
-        assert property_instance.roof == {"original_description": "Roof Description"}
+        assert property_instance.roof == {
+            'original_description': 'pitched, no insulation', 'is_pitched': True,
+            'is_flat': False, 'is_roof_room': False
+        }
+
        assert property_instance.walls == {
            "original_description": "Walls Description",
            "is_cavity_wall": True,
@ -289,24 +319,15 @@ class TestProperty:

        # Verify that ValueError is raised when EpcClean doesn't contain cleaned data
        with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"):
-            property_instance.get_components(mock_cleaner.cleaned)
+            property_instance.get_components(mock_cleaner.cleaned, pd.DataFrame(), pd.DataFrame())

-    def test_get_components_no_data(self, property_instance, mock_cleaner):
+    def test_get_components_no_attributes(
+        self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+    ):
        # Modify the mock cleaner to have no attributes for a specific description
        mock_cleaner.cleaned = {
            "roof-description": []
        }
-
-        # Verify that ValueError is raised when no attributes are found
-        with pytest.raises(ValueError, match="Property does not contain data"):
-            property_instance.get_components(mock_cleaner.cleaned)
-
-    def test_get_components_no_attributes(self, property_instance, mock_cleaner):
-        # Modify the mock cleaner to have no attributes for a specific description
-        mock_cleaner.cleaned = {
-            "roof-description": []
-        }
-        property_instance.search_address_epc()
        property_instance.data["roof-description"] = "Pitched, no insulation"
        property_instance.walls = {
            "original_description": "Walls Description",
@ -327,14 +348,17 @@ class TestProperty:
        }

        # Assert backup cleaning has been applied
-        property_instance.get_components(mock_cleaner.cleaned)
+        property_instance.get_components(
+            mock_cleaner.cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+        )

        assert property_instance.roof["clean_description"] == "Pitched, no insulation"
        assert property_instance.roof["is_pitched"]

-    def test_get_components_multiple_attributes(self, property_instance, mock_cleaner):
+    def test_get_components_multiple_attributes(
+        self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+    ):
        # This shouldn't happen - it would mean a cleaning error
-        property_instance.search_address_epc()
        property_instance.data["roof-description"] = "Roof Description"
        cleaned = {
            "roof-description": [
@ -345,10 +369,10 @@ class TestProperty:

        # Verify that ValueError is raised when multiple attributes are found
        with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"):
-            property_instance.get_components(cleaned)
+            property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds)

-    def test_set_spatial(self, mock_epc_client):
-        prop = Property(1, "AB12CD", "Test Address", mock_epc_client)
+    def test_set_spatial(self):
+        prop = Property(1, postcode="AB12CD", address="Test Address")

        spatial1 = pd.DataFrame([{
            'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@ -362,7 +386,7 @@ class TestProperty:
        assert prop.is_heritage
        assert prop.restricted_measures

-        prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop2 = Property(1, "AB12CD", "Test Address")

        spatial2 = pd.DataFrame([{
            'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@ -376,10 +400,10 @@ class TestProperty:
        assert not prop2.is_heritage
        assert not prop2.restricted_measures

-    def test_set_floor_level(self, mock_epc_client):
+    def test_set_floor_level(self):
        # In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground
        # floor, so we should set floor_level to 0
-        prop = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop = Property(1, postcode="AB12CD", address="Test Address")
        prop.data = {'floor-level': '01', 'property-type': 'Flat'}
        prop.floor = {
            'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation',
@ -395,7 +419,7 @@ class TestProperty:

        # This property is labelled as being on the ground floor but actually has another property below
        # so we set floor level to 1
-        prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop2 = Property(1, postcode="AB12CD", address="Test Address")
        prop2.data = {'floor-level': 'Ground', 'property-type': 'Flat'}
        prop2.floor = {
            'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
@ -410,7 +434,7 @@ class TestProperty:
        assert prop2.floor_level == 1

        # this property is correctly labelled as being on the 2nd floor
-        prop3 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop3 = Property(1, postcode="AB12CD", address="Test Address")
        prop3.data = {'floor-level': '02', 'property-type': 'Flat'}
        prop3.floor = {
            'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
@ -425,7 +449,7 @@ class TestProperty:
        assert prop3.floor_level == 2

        # Example of a house
-        prop4 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop4 = Property(1, postcode="AB12CD", address="Test Address")
        prop4.data = {'floor-level': '', 'property-type': 'House'}
        prop4.floor = {
            'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
--- a/backend/tests/test_sap_model_prep.py
+++ b/backend/tests/test_sap_model_prep.py
@ -2,13 +2,11 @@ from backend.Property import Property
 from etl.epc.DataProcessor import DataProcessor
 from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
-from epc_api.client import EpcClient
 import pandas as pd
 import pytest
 import msgpack

 from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
-from tqdm import tqdm


 # Handy code for selecting testing data
@ -122,7 +120,21 @@ class TestSapModelPrep:
        cleaned = msgpack.unpackb(cleaned, raw=False)
        return cleaned

-    def test_fill_cavity_wall(self, cleaned, cleaning_data):
+    @pytest.fixture
+    def photo_supply_lookup(self):
+        photo_supply_lookup = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="solar_pv_supply/photo_supply_lookup.parquet",
+        )
+        return photo_supply_lookup
+
+    @pytest.fixture
+    def floor_area_decile_thresholds(self):
+        floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
+        )
+        return floor_area_decile_thresholds
+
+    def test_fill_cavity_wall(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
        """
        We ensure that the process that prepares the data in the engine code results in the same data as
        the model is trained on
@ -288,11 +300,10 @@ class TestSapModelPrep:
        home = Property(
            id=0,
            postcode=starting_epc["postcode"],
-            address1=starting_epc["address1"],
-            epc_client=EpcClient(auth_token="notoken"),
+            address=starting_epc["address1"],
            data=starting_epc
        )
-        home.get_components(cleaned)
+        home.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)

        data_processor = DataProcessor(None, newdata=True)
        data_processor.insert_data(pd.DataFrame([home.get_model_data()]))
@ -356,7 +367,7 @@ class TestSapModelPrep:

            assert test_record[c].values[0] == row[c]

-    def test_internal_wall_insulation(self, cleaned, cleaning_data):
+    def test_internal_wall_insulation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):

        starting_epc2 = {
            'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY',
@ -508,11 +519,10 @@ class TestSapModelPrep:
        home2 = Property(
            id=0,
            postcode=starting_epc2["postcode"],
-            address1=starting_epc2["address1"],
-            epc_client=EpcClient(auth_token="notoken"),
+            address=starting_epc2["address1"],
            data=starting_epc2
        )
-        home2.get_components(cleaned)
+        home2.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
        home2.set_number_lighting_outlets(None)

        data_processor2 = DataProcessor(None, newdata=True)
@ -578,7 +588,7 @@ class TestSapModelPrep:

            assert test_record2[c].values[0] == row2[c]

-    def test_ventilation(self, cleaned, cleaning_data):
+    def test_ventilation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):

        starting_epc3 = {
            'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor',
@ -728,11 +738,10 @@ class TestSapModelPrep:
        home3 = Property(
            id=0,
            postcode=starting_epc3["postcode"],
-            address1=starting_epc3["address1"],
-            epc_client=EpcClient(auth_token="notoken"),
+            address=starting_epc3["address1"],
            data=starting_epc3
        )
-        home3.get_components(cleaned)
+        home3.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
        home3.set_number_lighting_outlets(None)

        data_processor3 = DataProcessor(None, newdata=True)
@ -782,7 +791,7 @@ class TestSapModelPrep:

            assert test_record3[c].values[0] == row3[c]

-    def test_fireplaces(self, cleaned, cleaning_data):
+    def test_fireplaces(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):

        starting_epc4 = {
            'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill',
@ -937,11 +946,10 @@ class TestSapModelPrep:
        home4 = Property(
            id=0,
            postcode=starting_epc4["postcode"],
-            address1=starting_epc4["address1"],
-            epc_client=EpcClient(auth_token="notoken"),
+            address=starting_epc4["address1"],
            data=starting_epc4
        )
-        home4.get_components(cleaned)
+        home4.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
        home4.set_number_lighting_outlets(None)

        data_processor4 = DataProcessor(None, newdata=True)
--- a/etl/costs/app.py
+++ b/etl/costs/app.py
@ -75,6 +75,7 @@ def app():
    ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
    lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
    flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
+    window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)

    # Form a single table to be uploaded
    costs = pd.concat(
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@ -33,6 +33,7 @@ class Eligibility:

    # If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
    LOFT_INSULATION_THRESHOLD = 100
+    HIGH_LOFT_INSULATION_THRESHOLD = 269

    # Because EPCS have different values for tenure, we need to remap them to a common set of values
    tenure_remap = {
@ -104,6 +105,8 @@ class Eligibility:
            self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
        )

+        high_loft_thickness_threshold = self.HIGH_LOFT_INSULATION_THRESHOLD
+
        # We firstly check if the roof is a loft
        is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])

@ -122,7 +125,22 @@ class Eligibility:
            is_flat=self.roof["is_flat"]
        )

-        if insulation_thickness > loft_thickness_threshold:
+        if insulation_thickness <= loft_thickness_threshold:
+            self.loft = {
+                "suitability": True,
+                "thickness": insulation_thickness,
+                "reason": None
+            }
+
+        if insulation_thickness <= high_loft_thickness_threshold:
+            self.loft = {
+                "suitability": True,
+                "thickness": insulation_thickness,
+                "reason": "high loft thickness but below regulation"
+            }
+            return
+
+        if insulation_thickness > high_loft_thickness_threshold:
            # Insulation is already thick enough
            self.loft = {
                "suitability": False,
@ -131,12 +149,6 @@ class Eligibility:
            }
            return

-        self.loft = {
-            "suitability": True,
-            "thickness": insulation_thickness,
-            "reason": None
-        }
-
    def cavity_insulation(self):

        """
@ -152,9 +164,25 @@ class Eligibility:
        is_partial_filled = (
            self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"]
        )
+        # We look for potentially under performing cavities - anything that is assumed, as built and insulated
+        is_underperforming = (
+            self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"]
+        )

        is_unfilled_cavity = is_cavity and is_empty
        is_partial_filled_cavity = is_cavity and is_partial_filled
+        is_underperforming_cavity = is_cavity and is_underperforming
+
+        # Check if it has internal or external wall insulation
+        has_internal_wall_insulation = self.walls["internal_insulation"]
+        has_external_wall_insulation = self.walls["external_insulation"]
+
+        if has_internal_wall_insulation or has_external_wall_insulation:
+            self.cavity = {
+                "suitability": False,
+                "type": "internal or external wall insulation"
+            }
+            return

        if is_unfilled_cavity:
            self.cavity = {
@ -170,6 +198,13 @@ class Eligibility:
            }
            return

+        if is_underperforming_cavity:
+            self.cavity = {
+                "suitability": True,
+                "type": "underperforming"
+            }
+            return
+
        self.cavity = {
            "suitability": False,
            "type": "full"
@ -223,6 +258,14 @@ class Eligibility:
        }

    def suspended_floor_insulation(self):
+
+        if "no_data" in self.floor.keys():
+            if self.floor["no_data"]:
+                self.suspended_floor = {
+                    "suitability": False,
+                }
+                return
+
        is_suspended = self.floor["is_suspended"]
        is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]

@ -232,6 +275,14 @@ class Eligibility:
        return

    def solid_floor_insulation(self):
+
+        if "no_data" in self.floor.keys():
+            if self.floor["no_data"]:
+                self.solid_floor = {
+                    "suitability": False,
+                }
+                return
+
        is_solid = self.floor["is_solid"]
        is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]

@ -305,7 +356,8 @@ class Eligibility:
        """

        current_sap = int(self.epc["current-energy-efficiency"])
-        if current_sap > 54:
+
+        if current_sap >= 69:
            self.eco4_warmfront = {
                "eligible": False,
                "message": "sap too high"
@ -319,9 +371,22 @@ class Eligibility:
        is_eligible = self.cavity["suitability"] & self.loft["suitability"]

        if post_retrofit_sap is None:
+
+            if current_sap >= 55:
+                message = "Possibly eligible but property currently EPC D"
+            else:
+                message = "subject to post retrofit sap" if is_eligible else "not eligible"
+
+            # Update the message to flag properties that failed just because of a full cavity.
+            # We need to double check that the wall is a cavity, that the loft is suitable and that the
+            # sap is within reason
+            # We can then estimate the age of the cavity fill
+            if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
+                message = "Failed due to full cavity - check cavity age"
+
            self.eco4_warmfront = {
                "eligible": is_eligible,
-                "message": "subject to post retrofit sap"
+                "message": message
            }
            return

--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@ -11,13 +11,12 @@ import numpy as np
 import msgpack
 from datetime import datetime, timedelta
 from utils.logger import setup_logger
-from utils.s3 import read_from_s3
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
 from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc
 from backend.Property import Property
 from etl.eligibility.Eligibility import Eligibility
 from etl.epc.DataProcessor import DataProcessor
-from backend.app.utils import read_parquet_from_s3
 from backend.app.plan.utils import create_recommendation_scoring_data
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
@ -247,6 +246,8 @@ def merge_ha_15(asset_list, identified_addresses):

        identified_addresses = identified_addresses.drop_duplicates("merge_key")

+    # We pull out raw counts for the survey lists
+
    # Check asset list for dupes
    asset_list_dupes = asset_list["merge_key"].duplicated()
    if asset_list_dupes.sum():
@ -336,7 +337,10 @@ def merge_ha_15(asset_list, identified_addresses):
    return merged_data, dropped_identified_merge_keys


-def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, created_at):
+def prepare_model_data_row(
+    property_id, modelling_epc, cleaned, cleaning_data, created_at,
+    photo_supply_lookup, floor_area_decile_thresholds, old_data=None, full_sap_epc=None,
+):
    """
    This function prepares the data for modelling, in the same fashion as the recommendation engine
    With up-coming refactoring, this will change
@ -346,15 +350,24 @@ def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, c
    p = Property(
        id=property_id,
        postcode=modelling_epc["postcode"],
-        address1=modelling_epc["address1"],
-        epc_client=None,
-        data=modelling_epc
+        address=modelling_epc["address1"],
+        data=modelling_epc,
+        old_data=old_data,
+        full_sap_epc=full_sap_epc
    )

-    p.get_components(cleaned)
+    p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup,
+                     floor_area_decile_thresholds=floor_area_decile_thresholds)
+
+    # THIS IS TEMP AND SHOULDN'T BE HERE
+    data_to_clean = p.get_model_data()
+    if data_to_clean["NUMBER_HEATED_ROOMS"] in ['', None]:
+        data_to_clean["NUMBER_HEATED_ROOMS"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
+        p.data["number-heated-rooms"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
+
    # This is temp - this should happen after scoring
    cleaned_property_data = DataProcessor.apply_averages_cleaning(
-        data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
+        data_to_clean=pd.DataFrame([dict(**data_to_clean, LOCAL_AUTHORITY=p.data["local-authority"])]),
        cleaning_data=cleaning_data,
        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
    )
@ -829,6 +842,18 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
        results_df["warmfront_identified"]
    ]

+    # Aggregates of no eco and gbis jobs identified
+    n_eco = results_df["eco4_eligible"].sum()
+    # Gbis is rows where eco4 is not eligible
+    n_gbis = results_df[
+        (results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
+        ]["gbis_eligible"].sum()
+
+    pipeline_potential = results_df[
+        (results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
+            results_df["gbis_eligible"] == True)
+        ]
+
    success_rate = warmfront_identified["gbis_eligible"].sum() / warmfront_identified.shape[0]
    # For HA32, this is 89%

@ -886,8 +911,16 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):

    new_possibilities = results_df[
        (~results_df["warmfront_identified"]) &
-        (results_df["gbis_eligible"] | results_df["eco4_eligible"]) &
-        (results_df["tenure"] == "Rented (social)")
+        (results_df["gbis_eligible"] | results_df["eco4_eligible"])
+        ].copy()
+
+    new_possibilities_eco = results_df[
+        (~results_df["warmfront_identified"]) &
+        (results_df["eco4_eligible"] == True)
+        ].copy()
+    new_possibilities_gbis = results_df[
+        (~results_df["warmfront_identified"]) &
+        (results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
        ].copy()

    future_possibilities_eco = results_df[
@ -947,6 +980,8 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
        results_df["warmfront_identified"]
    ]

+    warmfront_identified = warmfront_identified
+
    n_identified = (warmfront_identified["gbis_eligible"] | warmfront_identified["eco4_eligible"]).sum()

    success_rate = n_identified / warmfront_identified.shape[0]
@ -955,6 +990,11 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
        "eligibility_classification"].value_counts()
    # For HA15 this is 50.3%

+    pipeline_potential = results_df[
+        (results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
+            results_df["gbis_eligible"] == True)
+        ]
+
    # of the properties we identify, what is the mix of confidenc

    missed = results_df[
@ -973,32 +1013,37 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
        missed["sap"] < 69
        ]

-    sap_low_enough["walls"].value_counts()
-    z = ha15[ha15["row_id"].isin(sap_too_high["row_id"].values)]
-
-    investigate_1 = ha15[ha15["row_id"].isin(sap_too_high["row_id"])][
-        ["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
-
-    investigate_2 = ha15[ha15["row_id"].isin(sap_low_enough["row_id"])][
-        ["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
-
-    missed["message"].value_counts()
+    # Aggregates of no eco and gbis jobs identified
+    n_eco = results_df["eco4_eligible"].sum()
+    # Gbis is rows where eco4 is not eligible
+    n_gbis = results_df[
+        (results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
+        ]["gbis_eligible"].sum()

    # We now look for properties that we identified, that were not identified by Warmfront

    new_possibilities = results_df[
        (~results_df["warmfront_identified"]) &
-        ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) &
-        (results_df["tenure"] == "Rented (social)")
+        ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True))
+        ].copy()
+
+    new_possibilities_eco = results_df[
+        (~results_df["warmfront_identified"]) &
+        (results_df["eco4_eligible"] == True)
+        ].copy()
+
+    new_possibilities_gbis = results_df[
+        (~results_df["warmfront_identified"]) &
+        (results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
        ].copy()

    # These are future possibilityies
-    new_possibilities_eco = results_df[
+    future_possibilities_eco = results_df[
        (~results_df["warmfront_identified"]) &
        (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
        ].copy()

-    new_possibilities_gbis = results_df[
+    future_possibilities_gbis = results_df[
        (~results_df["warmfront_identified"]) &
        (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
            ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
@ -1058,7 +1103,7 @@ def app():
    )
    cleaned = msgpack.unpackb(cleaned, raw=False)

-    cleaning_data = read_parquet_from_s3(
+    cleaning_data = read_dataframe_from_s3_parquet(
        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
    )

--- a/etl/eligibility/ha_15_32/ha16_app.py
+++ b/etl/eligibility/ha_15_32/ha16_app.py
@ -0,0 +1,647 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+import re
+
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_data():
+    # This asset list is spread across two sheets, which we need to combine
+
+    asset_list_filenames = [
+        "HESTIA - HA 16 ASSET LIST PART 1 OF 2.xlsx",
+        "HESTIA - HA 16 ASSET LIST PART 2 OF 2.xlsx",
+    ]
+
+    # Prepare lists to collect rows data and their colors
+    rows_data = []
+    rows_colors = []
+    colnames = []
+    for asset_list_filename in asset_list_filenames:
+        workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/{asset_list_filename}')
+        sheet = workbook.active
+        sheet_colnames = [cell.value for cell in sheet[1]]
+        colnames.append(sheet_colnames)
+
+        for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+            # row_color = COLOR_INDEX[row_color]
+            rows_data.append(row_data)
+            rows_colors.append(row_color)
+
+    asset_list = pd.DataFrame(rows_data, columns=colnames[0])
+    # Remove None columns
+    asset_list = asset_list.iloc[:, 0:12]
+    asset_list['row_color'] = rows_colors
+
+    asset_list["row_colour_name"] = np.where(
+        asset_list["row_color"] == "FFFF0000", "red",
+        np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
+    )
+
+    # Split up the address on commas, which is useful for matching later
+    split_addresses = asset_list['Address'].str.split(',', expand=True)
+    split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5']
+
+    asset_list = pd.concat([asset_list, split_addresses], axis=1)
+    # There is no commas separating house number and address 1
+    split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
+    split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
+    # We could re-concatenate but we only care about HouseNo for the moment
+    asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
+
+    # We now read in the survey list
+    survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
+    survey_sheet = survey_workbook.active
+
+    survey_rows = []
+    survey_colors = []
+
+    for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        survey_rows.append(row_data)
+        survey_colors.append(row_color)
+
+    survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+
+    # For the survey list, we don't need the colours, since there is a column called "INSTALLED OR CANCELLED"
+    # which describes the status of the property
+    survey_list["row_colour"] = survey_colors
+    survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
+    # Tidy up the street/block name a bit
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+    survey_list["Street / Block Name"] = np.where(
+        survey_list["Street / Block Name"] == "REEDS RD",
+        "Reeds ROAD",
+        survey_list["Street / Block Name"]
+    )
+    # Replace " rd " with "road"
+    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
+
+    # Replace " , " with ", "
+    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
+        " , ", ', ',
+    )
+    # Fix "{place} ,{place}" with "{place}, {place}"
+    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True)
+    # Strip whitespace
+    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
+
+    # Correct errors
+    survey_list["Post Code"] = np.where(
+        survey_list["Post Code"] == "M38 0SA",
+        "M38 9SA",
+        survey_list["Post Code"]
+    )
+
+    survey_list["Post Code"] = np.where(
+        (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
+        "M44 5JF",
+        survey_list["Post Code"]
+    )
+
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
+                                                                                        "plantation avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
+                                                                                        "howclough drive")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
+                                                                                        "brookhurst lane")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
+                                                                                        "birch road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
+                                                                                        "hodson road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
+                                                                                        "narbonne avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cumberland road, cadishead",
+                                                                                        "cumberland avenue, cadishead")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
+                                                                                        "ashton field drive")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
+                                                                                        "wedgwood road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
+                                                                                        "hamilton avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("lichens crescent, fitton hill",
+                                                                                        "lichens crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
+                                                                                        "south croft")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr", "fir tree avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
+                                                                                        "hawthorn crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
+                                                                                        "reins lee avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
+                                                                                        "wester hill road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
+                                                                                        "saint martins road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
+                                                                                        "timperley close")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
+                                                                                        "eastwood avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
+                                                                                        "grasmere road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
+                                                                                        "hulton avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
+                                                                                        "beechfield road")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
+                                                                                        "princes avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
+                                                                                        "edge fold crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
+                                                                                        "coniston avenue")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
+                                                                                        "blackthorn crescent")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
+                                                                                        "wellstock lane")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
+                                                                                        "brackley street")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
+                                                                                        "brook avenue, swinton")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
+                                                                                        "green avenue, swinton")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
+                                                                                        "grasmere avenue, wardley")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
+                                                                                        "mardale avenue, wardle")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
+                                                                                        "cartleach Grove")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
+                                                                                        "arbor Grove")
+
+    # Replacement for clively avenue 66-68
+    survey_list["NO."] = np.where(
+        survey_list["NO."] == "66-68",
+        "66",
+        survey_list["NO."]
+    )
+
+    # asset_list[asset_list["Address"].str.lower().str.contains("clively")]
+
+    # We now need to merge the survey list onto the asset list
+    # Could be easier just to do a search on each row, even though it's much slower
+    matched = []
+    for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
+
+        house_number = row["NO."]
+        if isinstance(house_number, str):
+            house_number = house_number.lower()
+
+        # Filter on the first line of the address
+        df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
+        # df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+        df = df[df["Address"].str.lower().str.contains(str(house_number))]
+        if df.shape[0] != 1:
+            df = df[df["HouseNo"] == str(house_number)]
+            if df.shape[0] != 1:
+                df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+                if df.shape[0] != 1:
+                    raise ValueError("Investigate")
+
+        matched.append(
+            {
+                "survey_key": row["survey_key"],
+                "matched_address": df["Address"].values[0],
+                "survey_house_no": row["NO."],
+                "survey_street_name": row["Street / Block Name"],
+                "survey_postcode": row["Post Code"],
+                "survey_status": row["INSTALLED OR CANCELLED"]
+            }
+        )
+
+    matched = pd.DataFrame(matched)
+    matched["warmfront_identified"] = True
+
+    # Combine asset list and surveys
+    data = asset_list.merge(
+        matched, how="left", left_on="Address", right_on="matched_address",
+    )
+    data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
+
+    return data, survey_list
+
+
+def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    scoring_data = []
+    results = []
+    nodata = []
+
+    property_type_lookup = {
+        'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+        'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
+        'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
+        'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
+        'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
+        'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+        'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Detached House': {"property-type": "House", "built-form": "Detached"},
+        'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
+        'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
+    }
+
+    for index, property_meta in tqdm(data.iterrows(), total=len(data)):
+
+        searcher = SearchEpc(
+            address1=property_meta["HouseNo"],
+            postcode=property_meta["Postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            full_address=property_meta["Address"]
+        )
+        searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Type"]]["property-type"]
+        searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["Type"]]["built-form"]
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(property_meta)
+            continue
+
+        if searcher.newest_epc.get("estimated"):
+            # We insert the row ID as our proxy for UPRN
+            proxy_uprn = int(property_meta["row_id"].split("_")[1])
+            searcher.newest_epc["uprn"] = proxy_uprn
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+        # We also want to get the penultimate epc
+        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+        if not penultimate_epc:
+            penultimate_epc = newest_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+            eligibility.check_gbis_warmfront()
+            eligibility.check_eco4_warmfront()
+            # If this is the case, we need to update the older epcs
+            # We don't update just to make data cleaning easier
+            if penultimate_epc.get("estimated") is None:
+                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+        # Full checks
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"]:
+            if eligibility.epc["uprn"] == "":
+                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+            scoring_dictionary = prepare_model_data_row(
+                property_id=property_meta["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        results.append(
+            {
+                "row_id": property_meta["row_id"],
+                "uprn": eligibility.epc["uprn"],
+                "Address": property_meta["Address"],
+                "Postcode": property_meta["Postcode"],
+                "property_type": eligibility.epc["property-type"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "cavity_type": eligibility.cavity["type"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                "loft_thickness": eligibility.roof["insulation_thickness"],
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+    return results_df, scoring_data, nodata
+
+
+def analyse_results(results_df, data, survey_list):
+    analysis_data = data[["row_id", "survey_key", "warmfront_identified", "row_colour_name"]].merge(
+        results_df, how="left", on="row_id"
+    ).merge(
+        survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
+        how="left", on="survey_key"
+    )
+
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    warmfront_sold_eco4 = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
+        ]  # 1407
+
+    warmfront_sold_gbis = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
+        ]
+
+    ideal_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    secondary_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            analysis_data["roof_insulation_thickness_numeric"] > 100)
+        ]
+
+    # underperforming cavities
+    underperforming_cavities = analysis_data[
+        (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
+            analysis_data["cavity_age"] > 10 * 365
+        ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    identified_gbis_not_sold = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            analysis_data["eco4_eligible"] == False
+        )
+        ]
+
+    eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True]
+    eco_ineligible = analysis_data[analysis_data["eco4_eligible"] == False]
+
+    eco_ineligible["eco4_message"].value_counts()
+
+    # SAP too high:
+    sap_too_high = eco_ineligible[eco_ineligible["eco4_message"] == "sap too high"].copy()
+    further_possibilities = sap_too_high[
+        sap_too_high["walls"].isin(
+            [
+                "Cavity wall, as built, insulated",
+                "Cavity wall, as built, no insulation",
+                "Cavity wall, as built, partial insulation",
+                "Cavity wall, no insulation",
+                "Cavity wall, partial insulation"
+            ]
+        )
+    ]
+
+    filled_cavities = eco_ineligible[
+        eco_ineligible["eco4_message"] == "sap too high"
+        ]
+
+    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+    warmfront_identified["walls"].value_counts()
+
+    all_identified_gbis = analysis_data[
+        (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
+            ["ECO4 GBIS (ECO+)"])) |
+        (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
+        ]
+
+    empty_cavity_desriptions = [
+        "Cavity wall, as built, no insulation", "Cavity wall, as built, partial insulation",
+        "Cavity wall, no insulation", "Cavity wall, partial insulation"
+    ]
+
+    empty_cavities = analysis_data[analysis_data["walls"].isin(empty_cavity_desriptions)]
+    remaining_empty = empty_cavities[~empty_cavities["warmfront_identified"]]
+
+    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+
+    # Of the ECO jobs, what proportion to we get right
+    warmfront_identified_eco = warmfront_identified[
+        warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
+    ]
+
+    eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
+
+    warmfront_identified_gbis = warmfront_identified[
+        warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
+    ]
+
+    gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
+
+    # Additional identified
+    additional_identified_eco = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
+        ]
+
+    additional_identified_eco["eligibility_classification"].value_counts()
+
+    additional_identified_gbis = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
+            analysis_data["warmfront_identified"] == False
+        )
+        ].shape[0]
+    # Future
+    additional_identified_eco_future = analysis_data[
+        (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
+        ].shape[0]
+    additional_identified_gbis_future = analysis_data[
+        (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
+            analysis_data["warmfront_identified"] == False
+        )
+        ].shape[0]
+
+
+def app():
+    data, survey_list = load_data()
+
+    data["row_id"] = ["ha16_" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    created_at = datetime.now().isoformat()
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    results_df, scoring_data, nodata = get_epc_data(
+        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+    )
+
+    # Store
+    # Old file was ha16.pickle
+    # import pickle
+    # with open("ha16_10_jan.pickle", "wb") as f:
+    #     pickle.dump(
+    #         {
+    #             "scoring_data": scoring_data,
+    #             "results": results_df,
+    #             "nodata": nodata
+    #         }, f
+    #     )
+
+    # Read pickle
+    # import pickle
+    # with open("ha16_10_jan.pickle", "rb") as f:
+    #     saved = pickle.load(f)
+    # scoring_data = saved["scoring_data"]
+    # results_df = saved["results"]
+    # nodata = saved["nodata"]
--- a/etl/eligibility/ha_15_32/ha24_app.py
+++ b/etl/eligibility/ha_15_32/ha24_app.py
@ -0,0 +1,524 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_data():
+    workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ASSET LIST.xlsx')
+    sheet = workbook.active
+    sheet_colnames = [cell.value for cell in sheet[1]]
+
+    rows_data = []
+    rows_colors = []
+    for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        rows_data.append(row_data)
+        rows_colors.append(row_color)
+
+    asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
+    # Remove None columns
+    asset_list = asset_list.iloc[:, 0:10]
+    asset_list['row_color'] = rows_colors
+
+    asset_list["row_colour_name"] = np.where(
+        asset_list["row_color"] == "FFFF0000", "red",
+        np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
+    )
+
+    asset_list["row_colour_code"] = np.where(
+        asset_list["row_colour_name"] == "red", "does not meet criteria",
+        np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
+    )
+
+    # The third column is listed as "Address" but it's actually the postcode". We have two Address columns so we
+    # change just the third
+    asset_list.columns.values[2] = "Postcode"
+
+    # Split up the address on commas, which is useful for matching later
+    split_addresses = asset_list['Address'].str.split(',', expand=True)
+    split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5', 'address6']
+
+    asset_list = pd.concat([asset_list, split_addresses], axis=1)
+    # There is no commas separating house number and address 1
+    split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
+    split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
+    # We could re-concatenate but we only care about HouseNo for the moment
+    asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
+
+    # Read in surveys
+    survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
+    survey_sheet = survey_workbook.active
+
+    survey_rows = []
+    survey_colors = []
+
+    for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        survey_rows.append(row_data)
+        survey_colors.append(row_color)
+
+    survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+
+    survey_list["row_colour"] = survey_colors
+    survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
+    # Tidy up the street/block name a bit
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
+
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "council house, nidds lane", "nidds lane"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "wirral avenue", "wirrall avenue"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "st ives road", "st. ives crescent"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "sundringham road", "sandringham road"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "milton avenue", "milton road"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "st ives crescent", "st. ives crescent"
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "council house, waterbelly lane", "waterbelly lane"
+    )
+    # Generally remove "councile house, " from the start of the street name
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "council house, ", ""
+    )
+    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+        "st. leodegars close", "st leodegars close"
+    )
+
+    # asset_list[asset_list["Address"].str.lower().str.contains("wirral")]["Address"]
+
+    # Drop all None rows
+    survey_list = survey_list[~pd.isnull(survey_list["Street / Block Name"])]
+    survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
+
+    matched = []
+    for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
+        house_number = row["NO."]
+        if isinstance(house_number, str):
+            house_number = house_number.lower()
+
+        # Filter on the first line of the address
+        df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
+        # df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+        df = df[df["Address"].str.lower().str.contains(str(house_number))]
+        if df.shape[0] != 1:
+            df = df[df["HouseNo"] == str(house_number)]
+            if df.shape[0] != 1:
+                df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+                if df.shape[0] != 1:
+                    print(row["Street / Block Name"])
+                    print(house_number)
+                    print(row["Post Code"].lower())
+                    raise ValueError("Investigate")
+
+        matched.append(
+            {
+                "survey_key": row["survey_key"],
+                "matched_address": df["Address"].values[0],
+                "survey_house_no": row["NO."],
+                "survey_street_name": row["Street / Block Name"],
+                "survey_postcode": row["Post Code"],
+                "survey_status": row["INSTALLED OR CANCELLED"]
+            }
+        )
+
+    matched = pd.DataFrame(matched)
+    matched["warmfront_identified"] = True
+
+    # Combine asset list and surveys
+    data = asset_list.merge(
+        matched, how="left", left_on="Address", right_on="matched_address",
+    )
+    data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
+
+    return data, survey_list
+
+
+def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    scoring_data = []
+    results = []
+    nodata = []
+
+    property_type_lookup = {
+        "01 HOUSE": "House",
+        "02 FLAT": "Flat",
+        "03 BUNGALOW": "Bungalow",
+        "05 BEDSIT": "Flat",
+        "04 MAISONETTE": "Maisonette",
+        "01 HOUSE MID": "House",
+        "10 PBUNGALOW": "Bungalow",
+        "14 SFLAT": "Flat",
+        "12 SBEDSIT": "Flat",
+        "11 PFLAT": "Flat",
+        "13 SBUNGALOW": "Bungalow",
+        " 01 HOUSE MID": "House",
+        "09 PBEDSIT": "Flat"
+    }
+
+    for _, property_meta in tqdm(data.iterrows(), total=len(data)):
+
+        searcher = SearchEpc(
+            address1=property_meta["HouseNo"],
+            postcode=property_meta["Postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            full_address=property_meta["Address"]
+        )
+        searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Property Type"]]
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(property_meta)
+            continue
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+        # We also want to get the penultimate epc
+        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+        if not penultimate_epc:
+            penultimate_epc = newest_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+            eligibility.check_gbis_warmfront()
+            eligibility.check_eco4_warmfront()
+            # If this is the case, we need to update the older epcs
+            # older_epcs = [
+            #     x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]]
+            # ]
+            # If this is the case, we need to update the older epcs
+            # We don't update just to make data cleaning easier
+            if penultimate_epc.get("estimated") is None:
+                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+        # Full checks
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"]:
+            if eligibility.epc["uprn"] in ["", None]:
+                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+            scoring_dictionary = prepare_model_data_row(
+                property_id=property_meta["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        results.append(
+            {
+                "row_id": property_meta["row_id"],
+                "uprn": eligibility.epc["uprn"],
+                "Address": property_meta["Address"],
+                "Postcode": property_meta["Postcode"],
+                "property_type": eligibility.epc["property-type"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "cavity_type": eligibility.cavity["type"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+    model_api = ModelApi(portfolio_id="ha24-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+    return results_df, scoring_data, nodata
+
+
+def analyse_results(results_df, data, survey_list):
+    analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
+        results_df, how="left", on="row_id"
+    ).merge(
+        survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
+        how="left", on="survey_key"
+    )
+
+    # NEW
+
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    warmfront_sold_eco4 = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
+        ]
+
+    warmfront_sold_gbis = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
+        ]
+    # 1407
+
+    additional_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    additional_gbis_warmfront_not_sold = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            ~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
+        )
+        ]
+
+    additional_gbis_warmfront_not_sold["walls"].value_counts()
+    analysis_data["walls"].value_counts()
+
+    # END NEW
+
+    all_identified_eco = analysis_data[
+        (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
+            ["ECO4 A/W"])) |
+        (analysis_data["eco4_eligible"])
+        ]
+
+    all_identified_gbis = analysis_data[
+        (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
+            ["ECO4 GBIS (ECO+)"])) |
+        (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
+        ]
+
+    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+
+    # Of the ECO jobs, what proportion to we get right
+    warmfront_identified_eco = warmfront_identified[
+        warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
+    ]
+
+    eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
+
+    warmfront_identified_gbis = warmfront_identified[
+        warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
+    ]
+
+    # No gbis for this
+    # gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
+
+    # Additional identified
+    additional_identified_eco = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
+        ]
+
+    additional_identified_eco["eligibility_classification"].value_counts()
+
+    additional_identified_gbis = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
+            analysis_data["warmfront_identified"] == False
+        )
+        ].shape[0]
+    # Future
+    additional_identified_eco_future = analysis_data[
+        (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
+        ].shape[0]
+    additional_identified_gbis_future = analysis_data[
+        (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
+            analysis_data["warmfront_identified"] == False
+        )
+        ].shape[0]
+
+
+def app():
+    data, survey_list = load_data()
+
+    data["row_id"] = ["ha24_" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    created_at = datetime.now().isoformat()
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    results_df, scoring_data, nodata = get_epc_data(
+        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+    )
+
+    # Pickle results just in case
+    # import pickle
+    # with open("ha24_10_jan.pickle", "wb") as f:
+    #     pickle.dump(
+    #         {
+    #             "scoring_data": scoring_data,
+    #             "results": results_df,
+    #             "nodata": nodata
+    #         }, f
+    #     )
+
+    # Read in pickle
+    # import pickle
+    # with open("ha24_10_jan.pickle", "rb") as f:
+    #     saved = pickle.load(f)
+    # scoring_data = saved["scoring_data"]
+    # results_df = saved["results"]
+    # nodata = saved["nodata"]
--- a/etl/eligibility/ha_15_32/ha25_app.py
+++ b/etl/eligibility/ha_15_32/ha25_app.py
@ -0,0 +1,883 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+import re
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_data():
+    workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx', data_only=True)
+    sheet = workbook.active
+
+    rows_data = []
+    rows_colors = []
+    for row in sheet.iter_rows(min_row=1, values_only=True):  # use values_only=True to get values
+
+        row_data = list(row)  # No need for comprehension, values_only=True returns a tuple of values
+        rows_data.append(row_data)
+
+    # Headers are on the final row. Pop them off and store them and then remove them from rows_data
+    headers = rows_data.pop()
+    # The postcode header is None, so we replace it with "postcode"
+    headers[-1] = "postcode"
+
+    # Handle colours separately
+    for row in sheet.iter_rows(min_row=1, values_only=False):
+        # Assume first cell color is indicative of entire row
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        rows_colors.append(row_color)
+
+    # Remove the final row of colours, which is the header
+    rows_colors.pop()
+
+    asset_list = pd.DataFrame(rows_data, columns=headers)
+    asset_list['row_color'] = rows_colors
+
+    asset_list["row_colour_name"] = np.where(
+        asset_list["row_color"] == "FFFF0000", "red",
+        np.where(asset_list["row_color"] == "FF00B050", "green", "yellow")
+    )
+
+    asset_list["row_colour_code"] = np.where(
+        asset_list["row_colour_name"] == "red", "does not meet criteria",
+        np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
+    )
+
+    asset_list["address"] = asset_list["T1_Address"].copy().str.lower()
+    asset_list["address"] = asset_list["address"].str.replace("flat", "")
+    asset_list["address"] = asset_list["address"].str.strip()
+
+    split_addresses = asset_list['address'].str.split(' ', expand=True)
+    split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7',
+                               'address8',
+                               'address9', 'address10', 'address11', 'address12', 'address13', 'address14', ]
+    split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "")
+
+    # We could re-concatenate but we only care about HouseNo for the moment
+    asset_list = pd.concat([asset_list, split_addresses[["HouseNo"]]], axis=1)
+    asset_list["postcode"] = asset_list["postcode"].str.strip()
+
+    # We analysis historical ECO3 survey list
+    eco3_survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx')
+    eco3_survey_sheet = eco3_survey_workbook["CAVITY"]
+
+    eco3_survey_rows = []
+    eco3_survey_colors = []
+
+    for row in eco3_survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        eco3_survey_rows.append(row_data)
+        eco3_survey_colors.append(row_color)
+
+    # Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
+    eco3_survey_list = pd.DataFrame(eco3_survey_rows, columns=[cell.value for cell in eco3_survey_sheet[1]])
+    eco3_survey_list["row_colour"] = eco3_survey_colors
+    # Remove rows where street name is missing
+    eco3_survey_list = eco3_survey_list[~pd.isnull(eco3_survey_list["Street / Block Name"])]
+    # We need to parse the row colours
+    # We have the following mappings:
+    # FF7030A0: purple
+    # FF92D050: green
+    # FFFF0000: red
+    # FFFFFF00: yellow
+    # FF38FD23: green
+    eco3_survey_list["row_colour_name"] = np.where(
+        eco3_survey_list["row_colour"] == "FF7030A0", "purple",
+        np.where(eco3_survey_list["row_colour"] == "FF92D050", "green",
+                 np.where(eco3_survey_list["row_colour"] == "FFFF0000", "red",
+                          np.where(eco3_survey_list["row_colour"] == "FFFFFF00", "yellow",
+                                   np.where(eco3_survey_list["row_colour"] == "FF38FD23", "green", "unknown")
+                                   )
+                          )
+                 )
+    )
+
+    # We map the meaning:
+    # red: cancelled
+    # green: installed advised install complete
+    # purple: installer advised install complete + post works EPC
+    # yellow: filler row - drop
+    eco3_survey_list["row_colour_code"] = np.where(
+        eco3_survey_list["row_colour_name"] == "red", "cancelled",
+        np.where(eco3_survey_list["row_colour_name"] == "green", "installed advised install complete",
+                 np.where(eco3_survey_list["row_colour_name"] == "purple",
+                          "installer advised install complete + post works EPC",
+                          np.where(eco3_survey_list["row_colour_name"] == "yellow", "filler row - drop", "unknown")
+                          )
+                 )
+    )
+
+    # This is good enough for the indicative cancellation rates
+
+    # We now read in the indicative survey list which identified pospects for ECO4 works
+    eco4_survey_workbook = openpyxl.load_workbook(
+        f'etl/eligibility/ha_15_32/HESTIA - HA 25 ADHOC ISOLATED IDENTIFIED PROPERTIES FOR CWI.xlsx'
+    )
+    eco4_prospect_survey_sheet = eco4_survey_workbook["LiveWest"]
+
+    eco4_prospects_survey_rows = []
+    eco4_prospects_survey_colors = []
+
+    for row in eco4_prospect_survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        # row_color = COLOR_INDEX[row_color]
+        eco4_prospects_survey_rows.append(row_data)
+        eco4_prospects_survey_colors.append(row_color)
+
+    # Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
+    eco4_prospects_survey_list = pd.DataFrame(
+        eco4_prospects_survey_rows, columns=[cell.value for cell in eco4_prospect_survey_sheet[1]]
+    )
+    eco4_prospects_survey_list["row_colour"] = eco4_prospects_survey_colors
+
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.lower()
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.strip()
+
+    eco4_prospects_survey_list = eco4_prospects_survey_list[~pd.isnull(eco4_prospects_survey_list["ADDRESS 1"])]
+    eco4_prospects_survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(eco4_prospects_survey_list))]
+
+    # Correct some errors in the survey list
+    eco4_prospects_survey_list["POSTCODE"] = np.where(
+        (eco4_prospects_survey_list["ADDRESS 1"] == "berry park") &
+        (eco4_prospects_survey_list["POSTCODE"] == "PL12 6HP"),
+        "PL12 6EN",
+        eco4_prospects_survey_list["POSTCODE"]
+    )
+
+    # Remove semi colons from address in asset and survey list
+    asset_list["T1_Address"] = asset_list["T1_Address"].str.replace(";", "")
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(";", "")
+
+    # In the prosepcts survey list, we have 6 WALKHAM MEADOWS listed twice, which should be 6a and 6b
+    eco4_prospects_survey_list.loc[838, "NO"] = "6a"
+    eco4_prospects_survey_list.loc[839, "NO"] = "6b"
+
+    # 3, 7, 9 BOLDVENTURE ROAD should be BOLDVENTURE CLOSE
+    eco4_prospects_survey_list["ADDRESS 1"] = np.where(
+        (eco4_prospects_survey_list["ADDRESS 1"] == "boldventure road") &
+        (eco4_prospects_survey_list["NO"].isin([3, 7, 9])),
+        "boldventure close",
+        eco4_prospects_survey_list["ADDRESS 1"]
+    )
+
+    eco4_prospects_survey_list["ADDRESS 1"] = np.where(
+        (eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & (
+            eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
+        "old school road",
+        eco4_prospects_survey_list["ADDRESS 1"]
+    )
+
+    eco4_prospects_survey_list["ADDRESS 1"] = np.where(
+        (eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & (
+            eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & (
+            eco4_prospects_survey_list["NO"] == 52),
+        "drum way",
+        eco4_prospects_survey_list["ADDRESS 1"]
+    )
+
+    # String replace
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
+        "the gulls, collaton road", "the gulls collaton road"
+    )
+    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
+        "crows-an-eglose", "crows-an-eglos"
+    )
+
+    # We have a high volume of rows that do not match
+    matched = []
+    nomatch = []
+    for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)):
+
+        # Not in the asset list
+        if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN":
+            nomatch.append(row.to_dict())
+            continue
+
+        # Not in the asset list
+        if (row["ADDRESS 1"] == "roberts road") and row["NO"] == 23 and row["POSTCODE"] == "PL5 1DP":
+            nomatch.append(row.to_dict())
+            continue
+
+        # Not in the asset list
+        if row["ADDRESS 1"] in [
+            "kaynton mead", "broadmoor lane", "hoopers barton", "ecos court", "selwood road",
+            "castle street"
+        ]:
+            nomatch.append(row.to_dict())
+            continue
+
+        house_number = row["NO"]
+        if isinstance(house_number, str):
+            house_number = house_number.lower()
+
+            if "flat" in house_number:
+                house_number = house_number.split("flat")[1].strip()
+
+        # Filter on the first line of the address
+        df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
+        if house_number is not None:
+            if df.shape[0] != 1:
+                df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
+        if df.shape[0] != 1:
+            if house_number is not None:
+                df = df[df["HouseNo"] == str(house_number)]
+            if df.shape[0] != 1:
+                if row["POSTCODE"] is not None:
+                    df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
+                if df.shape[0] != 1:
+                    nomatch.append(row.to_dict())
+                    continue
+
+        matched.append(
+            {
+                "survey_key": row["survey_key"],
+                "matched_address": df["T1_Address"].values[0],
+                "survey_house_no": row["NO"],
+                "survey_street_name": row["ADDRESS 1"],
+                "survey_postcode": row["POSTCODE"],
+            }
+        )
+
+    nomatch = pd.DataFrame(nomatch)
+    matched = pd.DataFrame(matched)
+
+    matched["warmfront_identified"] = True
+
+    # Combine asset list and surveys
+    data = asset_list.merge(
+        matched, how="left", left_on="T1_Address", right_on="matched_address",
+    )
+    data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
+
+    lost_identified_properties = eco4_prospects_survey_list[
+        ~eco4_prospects_survey_list["survey_key"].isin(matched["survey_key"])
+    ]
+
+    return data, eco4_prospects_survey_list, lost_identified_properties
+
+
+def map_year_to_age_band(year):
+    try:
+        year = int(year)
+    except ValueError:
+        return "Invalid Year"  # Or any other way you want to handle invalid inputs
+
+    if year < 1900:
+        return "England and Wales: before 1900"
+    elif 1900 <= year <= 1929:
+        return "England and Wales: 1900-1929"
+    elif 1930 <= year <= 1949:
+        return "England and Wales: 1930-1949"
+    elif 1950 <= year <= 1966:
+        return "England and Wales: 1950-1966"
+    elif 1967 <= year <= 1975:
+        return "England and Wales: 1967-1975"
+    elif 1976 <= year <= 1982:
+        return "England and Wales: 1976-1982"
+    elif 1983 <= year <= 1990:
+        return "England and Wales: 1983-1990"
+    elif 1991 <= year <= 1995:
+        return "England and Wales: 1991-1995"
+    elif 1996 <= year <= 2002:
+        return "England and Wales: 1996-2002"
+    elif 2003 <= year <= 2006:
+        return "England and Wales: 2003-2006"
+    elif 2007 <= year <= 2011:
+        return "England and Wales: 2007-2011"
+    else:  # Assuming all remaining years are 2012 onwards
+        return "England and Wales: 2012 onwards"
+
+
+def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    scoring_data = []
+    results = []
+    nodata = []
+
+    property_type_lookup = {
+        "Flat": {"property-type": "Flat", "built-form": None},
+        "Mid Terrace House": {"property-type": "House", "built-form": "Mid-Terrace"},
+        "End Terrace House": {"property-type": "House", "built-form": "End-Terrace"},
+        "Maisonnette": {"property-type": "Flat", "built-form": None},
+        "Semi Detached House": {"property-type": "House", "built-form": "Semi-Detached"},
+        "Detached House": {"property-type": "House", "built-form": "Detached"},
+        "Coach House": {"property-type": "House", "built-form": "Detached"},
+        "Bungalow": {"property-type": "Bungalow", "built-form": None},
+        "Detached Bungalow": {"property-type": "Bungalow", "built-form": "Detached"},
+        "House": {"property-type": "House", "built-form": None},
+        "Semi Detached Bung": {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+        "Bedspace": {"property-type": None, "built-form": None},
+        "Office Buildings": {"property-type": None, "built-form": None},
+        "End Terrace Bungalow": {"property-type": "Bungalow", "built-form": "End-Terrace"},
+        "Mid Terrace Bungalow": {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+        "Bedsit": {"property-type": "Flat", "built-form": None},
+        "Mid Terrace Housekeeping": {"property-type": "House", "built-form": "Mid-Terrace"},
+        "Mid Terrace Housekeeping ": {"property-type": "House", "built-form": "Mid-Terrace"},
+        "End Terrace Housex": {"property-type": "House", "built-form": "End-Terrace"},
+        "Guest Room": {"property-type": None, "built-form": None}
+    }
+
+    for _, property_meta in tqdm(data, total=len(data)):
+
+        searcher = SearchEpc(
+            address1=property_meta["HouseNo"],
+            postcode=property_meta["postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            full_address=property_meta["address"]
+        )
+        searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["T1_AssetType"]][
+            "property-type"]
+        searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["T1_AssetType"]]["built-form"]
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(property_meta)
+            continue
+
+        if searcher.newest_epc.get("estimated"):
+            # We insert the row ID as our proxy for UPRN
+            proxy_uprn = int(property_meta["row_id"].split("_")[1])
+            searcher.newest_epc["uprn"] = proxy_uprn
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+        # We also want to get the penultimate epc
+        # penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+        # if not penultimate_epc:
+        #     penultimate_epc = newest_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        # if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+        #     eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+        #     eligibility.check_gbis_warmfront()
+        #     eligibility.check_eco4_warmfront()
+        #     # If this is the case, we need to update the older epcs
+        #     # We don't update just to make data cleaning easier
+        #     if penultimate_epc.get("estimated") is None:
+        #         older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+        # Full checks
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"]:
+            if eligibility.epc["uprn"] in ["", None]:
+                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+            if eligibility.epc["construction-age-band"] in ["", None]:
+                eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
+
+            # This is not the right place to do this but this is temp
+            if eligibility.epc["extension-count"] in ["", None]:
+                eligibility.epc["extension-count"] = 0
+
+            # Not in the right place but temp
+            if eligibility.epc["built-form"] in ["", None]:
+                if not older_epcs:
+                    eligibility.epc["built-form"] = "Mid-Terrace"
+
+            scoring_dictionary = prepare_model_data_row(
+                property_id=property_meta["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds,
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        results.append(
+            {
+                "row_id": property_meta["row_id"],
+                "uprn": eligibility.epc["uprn"],
+                "Address": property_meta["T1_Address"],
+                "Postcode": property_meta["postcode"],
+                "property_type": eligibility.epc["property-type"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "cavity_type": eligibility.cavity["type"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+    return results_df, scoring_data, nodata
+
+
+def get_epc_data_for_lost_surveys(
+    lost_identified_properties, cleaned, cleaning_data, created_at, photo_supply_lookup,
+    floor_area_decile_thresholds
+):
+    lost_identified_properties["row_id"] = [
+        "lost_surveys_ha25_" + str(i) for i in range(0, len(lost_identified_properties))
+    ]
+
+    scoring_data = []
+    results = []
+    nodata = []
+
+    property_type_lookup = {
+        "MID-TERRACE": {"property-type": "House", "built-form": "Mid-Terrace"},
+        "N/A": {"property-type": "House", "built-form": None},
+        "END-TERRACE": {"property-type": "House", "built-form": "End-Terrace"},
+        "GROUND-FLOOR": {"property-type": "House", "built-form": None},
+        "TOP-FLOOR": {"property-type": "House", "built-form": None},
+        "SEMI-DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
+        "MID-FLOOR": {"property-type": "House", "built-form": None},
+        "TOP-FLOOR FLAT": {"property-type": "House", "built-form": None},
+        "DETACHED": {"property-type": "House", "built-form": "Detached"},
+        "MID-FLOOR FLAT": {"property-type": "House", "built-form": None},
+        "SEMI- DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
+        "NO EPC ON GOV": {"property-type": "House", "built-form": None},
+        "Top-floor flat": {"property-type": "House", "built-form": None},
+        "GROUND-FLOOR FLAT": {"property-type": "House", "built-form": None},
+        "NOT ON GOV SITE": {"property-type": "House", "built-form": None}
+    }
+
+    for _, property_meta in tqdm(lost_identified_properties.iterrows(), total=len(lost_identified_properties)):
+
+        if property_meta["POSTCODE"] is None:
+            continue
+
+        full_address = ", ".join(
+            [str(x) for x in [
+                property_meta["NO"], property_meta["ADDRESS 1"], property_meta["ADDRESS 2"], property_meta["ADDRESS 3"]
+            ] if x is not None]
+        )
+
+        searcher = SearchEpc(
+            address1=str(property_meta["NO"]),
+            postcode=property_meta["POSTCODE"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            full_address=full_address
+        )
+
+        property_type_key = property_meta["PROPERTY TYPE"]
+        if property_type_key is not None:
+            searcher.ordnance_survey_client.property_type = property_type_lookup[property_type_key.strip()][
+                "property-type"]
+            searcher.ordnance_survey_client.built_form = property_type_lookup[property_type_key.strip()][
+                "built-form"]
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(property_meta)
+            continue
+
+        if searcher.newest_epc.get("estimated"):
+            # We insert the row ID as our proxy for UPRN
+            proxy_uprn = int(property_meta["row_id"].split("_")[-1])
+            searcher.newest_epc["uprn"] = proxy_uprn
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+        # We also want to get the penultimate epc
+        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+        if not penultimate_epc:
+            penultimate_epc = newest_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+            eligibility.check_gbis_warmfront()
+            eligibility.check_eco4_warmfront()
+            # If this is the case, we need to update the older epcs
+            # We don't update just to make data cleaning easier
+            if penultimate_epc.get("estimated") is None:
+                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+        # Full checks
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"] & (eligibility.epc["construction-age-band"] not in ["", None]):
+            if eligibility.epc["uprn"] in ["", None]:
+                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+            scoring_dictionary = prepare_model_data_row(
+                property_id=property_meta["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds,
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        results.append(
+            {
+                "row_id": property_meta["row_id"],
+                "uprn": eligibility.epc["uprn"],
+                "Address": property_meta["ADDRESS 1"],
+                "Postcode": property_meta["POSTCODE"],
+                "property_type": eligibility.epc["property-type"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "cavity_type": eligibility.cavity["type"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+    return results_df, scoring_data, nodata
+
+
+def analyse_results(results_df, data, eco4_prospects_survey_list):
+    analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
+        results_df, how="left", on="row_id"
+    )
+
+    analysis_data = analysis_data.merge(
+        eco4_prospects_survey_list[["survey_key", "ADDRESS 1", "NO", "POSTCODE"]],
+        how="left", on="survey_key"
+    )
+
+    # NEW
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    warmfront_identified = analysis_data[
+        (analysis_data["warmfront_identified"] == True)
+    ]  # 2204
+
+    # Because we don't know which property is for which scheme, we'll just look at what we found
+    ideal_eco4 = analysis_data[
+        (analysis_data["eco4_eligible"] == True) &
+        (analysis_data["roof_insulation_thickness_numeric"] <= 100) &
+        (analysis_data["sap"] <= 54)
+        ]  # 335
+
+    gbis = analysis_data[
+        (analysis_data["gbis_eligible"] == True) &
+        ~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
+        ]
+
+    ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
+
+
+def analyse_lost_surveys(results_df):
+    results_df["roof_insulation_thickness"] = np.where(
+        pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
+    )
+    results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    ideal_eco4 = results_df[
+        (results_df["eco4_eligible"] == True) &
+        (results_df["roof_insulation_thickness_numeric"] <= 100) &
+        (results_df["sap"] <= 54)
+        ]  # 25
+
+    gbis = results_df[
+        (results_df["gbis_eligible"] == True) &
+        ~results_df["row_id"].isin(ideal_eco4["row_id"].values)
+        ]  # 82
+
+
+def app():
+    data, eco4_prospects_survey_list, lost_identified_properties = load_data()
+
+    data["row_id"] = ["ha25_" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    created_at = datetime.now().isoformat()
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    results_df, scoring_data, nodata = get_epc_data(
+        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+    )
+    # Pickle the outputs
+    # Old data was ha25.pickle
+    # import pickle
+    # with open("ha25_10_jan.pickle", "wb") as f:
+    #     pickle.dump(
+    #         {
+    #             "results_df": results_df,
+    #             "scoring_data": scoring_data,
+    #             "nodata": nodata
+    #         },
+    #         f
+    #     )
+
+    # Load in pickle
+    import pickle
+    with open("ha25_10_jan.pickle", "rb") as f:
+        saved = pickle.load(f)
+    results_df = saved["results_df"]
+    scoring_data = saved["scoring_data"]
+    nodata = saved["nodata"]
--- a/etl/eligibility/ha_15_32/ha33_app.py
+++ b/etl/eligibility/ha_15_32/ha33_app.py
@ -264,21 +264,21 @@ def get_ha_33data(data, cleaned, cleaning_data, created_at):


 def analyse_ha_33(results_df, data):
-    results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
+    # results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
+    #
+    # results_df_social["tenure"].value_counts()

-    results_df_social["tenure"].value_counts()
+    data[data["row_id"].isin(results_df["row_id"].values)]["PROPERTY TYPE"].value_counts()

-    data[data["row_id"].isin(results_df_social["row_id"].values)]["PROPERTY TYPE"].value_counts()
+    n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
+    n_eco4 = results_df["eco4_eligible"].sum()
+    n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()

-    n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum()
-    n_eco4 = results_df_social["eco4_eligible"].sum()
-    n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum()
-
-    eco_eligibile = results_df_social[results_df_social["eco4_eligible"]]
+    eco_eligibile = results_df[results_df["eco4_eligible"]]
    eco_eligibile["walls"].value_counts()
    eco_eligibile["roof"].value_counts()

-    results_df_social[results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]]["tenure"].value_counts()
+    results_df[results_df["gbis_eligible"] | results_df["eco4_eligible"]]["tenure"].value_counts()

    results_df_social["eligibility_classification"].value_counts()

@ -316,3 +316,11 @@ def app():
    created_at = datetime.now().isoformat()

    results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at)
+
+    # Read in
+    import pickle
+    with open("ha33_results.pickle", "rb") as f:
+        data = pickle.load(f)
+    results_df = pd.DataFrame(data["results"])
+    scoring_data = data["scoring_data"]
+    nodata = data["nodata"]
--- a/etl/eligibility/ha_15_32/ha4_app.py
+++ b/etl/eligibility/ha_15_32/ha4_app.py
@ -0,0 +1,328 @@
+import os
+import msgpack
+from pathlib import Path
+from datetime import datetime
+import numpy as np
+import pandas as pd
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+import re
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_ha_4():
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+
+    data = pd.read_csv(f"etl/eligibility/ha_15_32/HA 4 Asset List.csv", low_memory=False)
+    return data
+
+
+def standardise_ha_4(data):
+    # Location name contains some strings like {0664} which we remove
+    data['Location Name'] = data['Location Name'].str.replace('\{.*?\}', '', regex=True)
+
+    # Trim whitespace from either end of location name
+    data["Location Name"] = data["Location Name"].str.strip()
+
+    # Remove any unusable postcodes
+    data = data[data["Post Code"] != '\\\\'].copy()
+
+    # Some specific replacements
+    data["Location Name"] = np.where(
+        data["Location Name"] == "Calderbrook Pl & Cog La",
+        "Calderbrook Place",
+        data["Location Name"]
+    )
+
+    return data
+
+
+def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    scoring_data = []
+    results = []
+    nodata = []
+    for _, property_meta in tqdm(data.iterrows(), total=len(data)):
+        # For many of the entries in this dataset, we're actually given an entire building, so we EPCs for every
+        # building
+        searcher = SearchEpc(
+            address1=property_meta["Address Line 1"],
+            postcode=property_meta["Post Code"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            property_type=property_type_lookup.get(house["Archetype"]),
+        )
+
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            searcher = SearchEpc(
+                address1=property_meta["Location Name"],
+                postcode=property_meta["Post Code"],
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key=None,
+                property_type=property_type_lookup.get(house["Archetype"]),
+            )
+            searcher.search()
+
+        if searcher.newest_epc is None:
+            nodata.append(house["row_id"])
+            continue
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+
+        searcher.search()
+
+        if searcher.data is None:
+            nodata.append(property_meta.to_dict())
+            continue
+
+        epcs = searcher.data["rows"]
+        epcs = pd.DataFrame(epcs)
+
+        # Take the newest EPC by UPRN
+        epcs = epcs.sort_values(by=["lodgement-date"], ascending=False)
+        newest_epcs = epcs.drop_duplicates(subset=["uprn"], keep="first")
+
+        # For each EPC, we now check eligibility
+        for _, epc in newest_epcs.iterrows():
+            eligibility = Eligibility(epc=epc.to_dict(), cleaned=cleaned)
+            eligibility.check_gbis_warmfront()
+            eligibility.check_eco4_warmfront()
+
+            # If the house is not identified, we do a full gbis and eco4 check
+            eligibility.check_gbis()
+            eligibility.check_eco4()
+
+            if eligibility.eco4_warmfront["eligible"]:
+                # We get old_eps
+                old_data = epcs[
+                    (epcs["uprn"] == epc["uprn"]) &
+                    (epcs["lmk-key"] != epc["lmk-key"])
+                    ].to_dict("records")
+
+                full_sap_epc = epcs[
+                    (epcs["uprn"] == epc["uprn"]) &
+                    (epcs["transaction-type"] == "new dwelling")
+                    ].to_dict("records")
+
+                scoring_dictionary = prepare_model_data_row(
+                    property_id=eligibility.epc["uprn"],
+                    modelling_epc=eligibility.epc,
+                    cleaned=cleaned,
+                    cleaning_data=cleaning_data,
+                    created_at=created_at,
+                    old_data=old_data,
+                    full_sap_epc=full_sap_epc
+                )
+                scoring_data.extend(scoring_dictionary)
+
+            results.append(
+                {
+                    "uprn": epc["uprn"],
+                    "Location Name": property_meta["Location Name"],
+                    "Post Code": property_meta["Post Code"],
+                    "property_type": eligibility.epc["property-type"],
+                    "gbis_eligible": eligibility.gbis_warmfront,
+                    "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                    "eco4_message": eligibility.eco4_warmfront["message"],
+                    "sap": float(eligibility.epc["current-energy-efficiency"]),
+                    "gbis_eligible_future": eligibility.gbis["eligible"],
+                    "gbis_eligible_future_message": eligibility.gbis["message"],
+                    "eco4_eligible_future": eligibility.eco4["eligible"],
+                    "eco4_eligible_future_message": eligibility.eco4["message"],
+                    # Property components
+                    "roof": eligibility.roof["clean_description"],
+                    "walls": eligibility.walls["clean_description"],
+                    "cavity_type": eligibility.cavity["type"],
+                    "heating": eligibility.epc["mainheat-description"],
+                    "tenure": eligibility.tenure,
+                    "date_epc": eligibility.epc["lodgement-date"],
+                }
+            )
+
+    scoring_df = pd.DataFrame(scoring_data)
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "uprn"}).merge(
+        results_df[["uprn", "sap"]], how="left", on="uprn"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "uprn"]],
+        how="left",
+        on="uprn"
+    )
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    results_df = results_df[~pd.isnull(results_df["uprn"])]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "uprn": row["uprn"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="uprn"
+    )
+    # We have some properties that are duplicated so we take just one instance
+    results_df = results_df.drop_duplicates(subset=["uprn"])
+
+    return results_df, scoring_data, nodata
+
+
+def analyse_ha_4(results_df, data):
+    n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
+    n_eco4 = results_df["eco4_eligible"].sum()
+    n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
+
+    eco_eligibile = results_df[results_df["eco4_eligible"]]
+    eco_eligibile["eligibility_classification"].value_counts()
+
+    future_possibilities_eco = results_df[
+        (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+    future_possibilities_gbis = results_df[
+        (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
+            ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+    total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
+
+
+def app():
+    data = load_ha_4()
+
+    data = standardise_ha_4(data)
+
+    data["row_id"] = ["h4" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    created_at = datetime.now().isoformat()
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    results_df, scoring_data, nodata = get_ha_4_data(
+        data=data,
+        cleaned=cleaned,
+        cleaning_data=cleaning_data,
+        created_at=created_at,
+        photo_supply_lookup=photo_supply_lookup,
+        floor_area_decile_thresholds=floor_area_decile_thresholds
+    )
+
+    # Store the data locally as a pickle
+    # import pickle
+    # with open("ha_4.pickle", "wb") as f:
+    #     pickle.dump(
+    #         {
+    #             "results_df": results_df,
+    #             "scoring_data": scoring_data,
+    #             "nodata": nodata
+    #         }, f)
+
+    # Read in
+    # import pickle
+    # with open("ha_4.pickle", "rb") as f:
+    #     data = pickle.load(f)
+    # results_df = data["results_df"]
+    # scoring_data = data["scoring_data"]
+    # nodata = data["nodata"]
--- a/etl/eligibility/ha_15_32/ha7_app.py
+++ b/etl/eligibility/ha_15_32/ha7_app.py
@ -0,0 +1,383 @@
+import os
+import msgpack
+import openpyxl
+from openpyxl.styles.colors import COLOR_INDEX
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+OS_API_KEY = os.getenv("ORDNANCE_SURVEY_API_KEY")
+
+
+def load_data():
+    """
+    Load the data from the excel
+    """
+
+    workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 7 ASSET LIST.xlsx')
+    sheet = workbook.active
+
+    # Prepare lists to collect rows data and their colors
+    rows_data = []
+    rows_colors = []
+    for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+
+        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+        row_color = COLOR_INDEX[row_color]
+        rows_data.append(row_data)
+        rows_colors.append(row_color)
+
+    df = pd.DataFrame(rows_data, columns=[cell.value for cell in sheet[1]])
+
+    # Add the row colors as a new column
+    df['row_color'] = rows_colors
+    df.columns.values[8] = "is_active"
+
+    # Remove None columns
+    df = df.dropna(axis=1, how='all')
+    # We now parse the colours
+    df["row_color"].unique()
+    df["row_colour_name"] = np.where(
+        df["row_color"] == "0000FFFF", "red",
+        np.where(df["row_color"] == "00FF00FF", "green", "yellow")
+    )
+    df["row_code"] = np.where(
+        df["row_colour_name"] == "red", "invalid",
+        np.where(df["row_colour_name"] == "green", "potential ECO4", "needs criteria change")
+    )
+
+    return df
+
+
+def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+    property_type_lookup = {
+        # "Mid Terrace": "Mid-Terrace",
+        # "End Terrace": "End-Terrace",
+        # "Semi Detached": "Semi-Detached",
+        # "Detached": "Detached",
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+    }
+
+    scoring_data = []
+    results = []
+    nodata = []
+    for _, house in tqdm(data.iterrows(), total=len(data)):
+
+        if house["Address"]:
+            address = house["Address"]
+        else:
+            address = house["Address2"]
+
+        searcher = SearchEpc(
+            address1=address,
+            postcode=house["Postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            property_type=property_type_lookup.get(house["Archetype"]),
+        )
+
+        searcher.find_property(skip_os=True)
+
+        if searcher.newest_epc is None:
+            nodata.append(house["row_id"])
+            continue
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+
+        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+        eligibility.check_gbis_warmfront()
+        eligibility.check_eco4_warmfront()
+
+        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+        # If the house is not identified, we do a full gbis and eco4 check
+        eligibility.check_gbis()
+        eligibility.check_eco4()
+
+        if eligibility.eco4_warmfront["eligible"]:
+            scoring_dictionary = prepare_model_data_row(
+                property_id=house["row_id"],
+                modelling_epc=eligibility.epc,
+                cleaned=cleaned,
+                cleaning_data=cleaning_data,
+                created_at=created_at,
+                old_data=older_epcs,
+                full_sap_epc=full_sap_epc,
+                photo_supply_lookup=photo_supply_lookup,
+                floor_area_decile_thresholds=floor_area_decile_thresholds
+            )
+            scoring_data.extend(scoring_dictionary)
+
+        # If nothing is eligible or gbis is eligible, then we make a record this
+        results.append(
+            {
+                "row_id": house["row_id"],
+                "address": house["Address"],
+                "postcode": house["Postcode"],
+                "gbis_eligible": eligibility.gbis_warmfront,
+                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+                "eco4_message": eligibility.eco4_warmfront["message"],
+                "sap": float(eligibility.epc["current-energy-efficiency"]),
+                "gbis_eligible_future": eligibility.gbis["eligible"],
+                "gbis_eligible_future_message": eligibility.gbis["message"],
+                "eco4_eligible_future": eligibility.eco4["eligible"],
+                "eco4_eligible_future_message": eligibility.eco4["message"],
+                # Property components
+                "roof": eligibility.roof["clean_description"],
+                "walls": eligibility.walls["clean_description"],
+                "heating": eligibility.epc["mainheat-description"],
+                "tenure": eligibility.tenure,
+                "date_epc": eligibility.epc["lodgement-date"],
+                **newest_epc,
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
+            }
+        )
+
+    scoring_df = pd.DataFrame(scoring_data)
+    # Implement the same process that is being used in the recommendation engine to cleaning scoring_df
+
+    # Perform the same cleaning as in the model - first clean number of room variables though
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+    )
+
+    scoring_df = DataProcessor.apply_averages_cleaning(
+        data_to_clean=scoring_df,
+        cleaning_data=cleaning_data,
+        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+    ).drop(columns=["LOCAL_AUTHORITY"])
+
+    scoring_df = DataProcessor.clean_missings_after_description_process(
+        scoring_df,
+        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+    )
+
+    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+
+    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+    all_predictions = model_api.predict_all(
+        df=scoring_df,
+        bucket="retrofit-data-dev",
+        prediction_buckets={
+            "sap_change_predictions": "retrofit-sap-predictions-dev",
+            "heat_demand_predictions": "retrofit-heat-predictions-dev",
+            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+        }
+    )
+
+    predictions = all_predictions["sap_change_predictions"].copy()
+
+    results_df = pd.DataFrame(results)
+
+    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+        results_df[["row_id", "sap"]], how="left", on="row_id"
+    )
+    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+    results_df = results_df.merge(
+        predictions[["sap_uplift", "row_id"]],
+        how="left",
+        on="row_id"
+    )
+
+    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+    eligibility_assessment = []
+    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+        # The upgrade requirements are dependent on the current SAP
+
+        # If the property is an F or G, it only needs to upgrade to an %
+        if row["sap"] <= 38:
+            if row["post_install_sap"] >= 57:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 55:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 53:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+        else:
+
+            if row["post_install_sap"] >= 71:
+                eligibility_classification = "highest confidence"
+            elif row["post_install_sap"] >= 69:
+                eligibility_classification = "high confidence"
+            elif row["post_install_sap"] >= 67:
+                eligibility_classification = "medium confidence"
+            else:
+                eligibility_classification = "unlikely"
+
+        eligibility_assessment.append(
+            {
+                "row_id": row["row_id"],
+                "eligibility_classification": eligibility_classification
+            }
+        )
+
+    eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+    results_df = results_df.merge(
+        eligibility_assessment, how="left", on="row_id"
+    )
+
+    return results_df, scoring_data, nodata
+
+
+def analyse_ha_7(results_df, data):
+    analysis_data = results_df.merge(
+        data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
+    )
+
+    analysis_data["row_code"].value_counts()
+
+    # NEW
+
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    ideal_eco4 = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (
+            analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    secondary_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (
+            analysis_data["roof_insulation_thickness_numeric"] > 100)
+        ]
+
+    # underperforming cavities
+    underperforming_cavities = analysis_data[
+        (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
+            analysis_data["cavity_age"] > 9 * 365
+        ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    identified_gbis_not_sold = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (
+            analysis_data["eco4_eligible"] == False
+        )
+        ]
+
+    wf_identified = analysis_data[
+        (analysis_data["row_code"] == "potential ECO4")
+    ]
+
+    # END NEW
+
+    warmfront_identification = analysis_data["row_code"].value_counts()
+    warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
+    warmfront_identified["walls"].value_counts(normalize=True)
+
+    analysis_data["Construction Year Band"].value_counts(normalize=True)
+
+    # Number of days from today
+
+    days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
+    days_to_today.mean()
+
+    property_types = analysis_data["Property Type"].value_counts()
+
+    n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
+
+    eco_identified = results_df[results_df["eco4_eligible"]]
+    n_eco4 = eco_identified["eco4_eligible"].sum()
+    gbis_identified = results_df[~results_df["eco4_eligible"] & results_df["gbis_eligible"]]
+    n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
+
+    eco_eligibile = results_df[results_df["eco4_eligible"]]
+    eco_eligibile["eligibility_classification"].value_counts()
+
+    future_possibilities_eco = results_df[
+        (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+    future_possibilities_gbis = results_df[
+        (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
+            ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+        ].copy()
+
+    total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
+
+
+def app():
+    data = load_data()
+    data["row_id"] = ["ha7" + str(i) for i in range(0, len(data))]
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    cleaning_data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    )
+
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+    created_at = datetime.now().isoformat()
+
+    results_df, scoring_data, nodata = get_ha7_data(
+        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+    )
+
+    # Pickle results
+    # import pickle
+    # with open("ha7_results_jan_10.pkl", "wb") as f:
+    #     pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
+
+    # Read in the old data
+    # import pickle
+    # with open("ha7_results_jan_10.pkl", "rb") as f:
+    #     old_data = pickle.load(f)
+    # results_df = old_data["results_df"]
+    # scoring_data = old_data["scoring_data"]
+    # nodata = old_data["nodata"]
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@ -766,12 +766,16 @@ class EPCDataProcessor:
            how='left'
        )

+        global_averages = cleaning_data[cols_to_clean].mean()
+
        # Fill NaN values with averages
        for col in cols_to_clean:
            data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
            data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
            # If we still have missings
            data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
+            # Final step if we still have missings - use global mean
+            data_to_clean[col].fillna(global_averages[col], inplace=True)

        return data_to_clean

--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@ -23,6 +23,12 @@ def main():
    pd.DataFrame(epc_pipeline.compiled_all_equal_rows).to_parquet("refactor_datasets/all_equal_rows.parquet")
    pd.concat(epc_pipeline.compiled_cleaning_averages).to_parquet("refactor_datasets/cleaning_averages.parquet")

+    from utils.s3 import read_dataframe_from_s3_parquet
+    dataset = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev",
+        file_key="sap_change_model/dataset_test.parquet",
+    )
+

 if __name__ == "__main__":
    main()
--- a/etl/epc_clean/epc_attributes/MainheatAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py
@ -16,6 +16,7 @@ class MainHeatAttributes(Definitions):
        "solar assisted heat pump",
        "exhaust source heat pump",
        "community heat pump",
+        "portable electric heating"
    ]
    FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite",
                  "dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k"]
--- a/etl/epc_clean/epc_attributes/WallAttributes.py
+++ b/etl/epc_clean/epc_attributes/WallAttributes.py
@ -152,4 +152,7 @@ class WallAttributes(Definitions):
            else:
                result["insulation_thickness"] = "average"

+        if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"):
+            result["is_filled_cavity"] = True
+
        return result
--- a/etl/epc_clean/epc_attributes/WindowAttributes.py
+++ b/etl/epc_clean/epc_attributes/WindowAttributes.py
@ -52,7 +52,7 @@ class WindowAttributes(Definitions):
                raise ValueError('Invalid description')

    def process(self) -> Dict[str, Union[str, bool]]:
-        result: Dict[str, Union[str, bool]] = {
+        result: Dict[str, Union[str, bool, None]] = {
            "has_glazing": False,
            "glazing_coverage": None,
            "glazing_type": None,
@ -80,7 +80,11 @@ class WindowAttributes(Definitions):
                        break

        # If we didn't find any coverage or type, we assume full coverage
-        if not result["glazing_coverage"]:
+        if (not result["glazing_coverage"]) & (result["glazing_type"] != "single"):
            result["glazing_coverage"] = "full"

+        # We reset some values if the glazing is single
+        if result["glazing_type"] == "single":
+            result["has_glazing"] = False
+
        return result
--- a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py
@ -1652,4 +1652,17 @@ mainheat_cases = [
     'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
     "has_electric_heat_pumps": False,
     "has_micro-cogeneration": False},
+    {'original_description': 'Portable electric heating assumed for most rooms', 'has_radiators': False,
+     'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
+     'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
+     'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
+     'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
+     'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
+     'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False,
+     'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False,
+     'has_community_heat_pump': False, 'has_portable_electric_heating': True, 'has_electric': True,
+     'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
+     'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
+     'has_b30k': False, 'has_assumed': True, 'has_electricaire': False, 'has_assumed_for_most_rooms': True,
+     'has_underfloor_heating': False}
 ]
--- a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
@ -550,7 +550,7 @@ wall_cases = [
     'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False,
     'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False},
    {'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None,
-     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
+     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
     'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
     'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
     'external_insulation': False, 'internal_insulation': False},
@ -727,7 +727,7 @@ wall_cases = [
     'external_insulation': False, 'internal_insulation': False},
    {'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)',
     'thermal_transmittance': None,
-     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
+     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
     'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
     'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
     'external_insulation': False, 'internal_insulation': False},
--- a/etl/epc_clean/tests/test_data/test_window_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_window_attributes_cases.py
@ -30,7 +30,8 @@ windows_cases = [
     'glazing_type': 'triple', 'no_data': False},
    {'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial',
     'glazing_type': 'triple', 'no_data': False},
-    {'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
+    {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
+     'glazing_type': 'single',
     'no_data': False},
    {'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
     'glazing_type': 'double', 'no_data': False},
@ -46,7 +47,8 @@ windows_cases = [
     'glazing_type': 'double', 'no_data': False},
    {'original_description': 'Gwydrau dwbl gan mwyaf', 'has_glazing': True, 'glazing_coverage': 'most',
     'glazing_type': 'double', 'no_data': False},
-    {'original_description': 'Gwydrau sengl', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
+    {'original_description': 'Gwydrau sengl', 'has_glazing': False, 'glazing_coverage': None,
+     'glazing_type': 'single',
     'no_data': False},
    {'original_description': 'Ffenestri perfformiad uchel', 'has_glazing': True, 'glazing_coverage': 'full',
     'glazing_type': 'high performance', 'no_data': False},
--- a/etl/epc_clean/tests/test_roof_attributes.py
+++ b/etl/epc_clean/tests/test_roof_attributes.py
@ -3,12 +3,13 @@ from pathlib import Path
 from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases
 from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes

+
 # For local testing
-if __file__ == "<input>":
-    input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
-else:
-    current_file_path = Path(__file__)
-    input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
+# if __file__ == "<input>":
+#     input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
+# else:
+#     current_file_path = Path(__file__)
+#     input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'


 class TestRoofAttributes:
@ -88,7 +89,12 @@ class TestRoofAttributes:

    def test_clean_roof_no_description(self):
        roof = RoofAttributes('').process()
-        assert roof == {}
+        assert roof == {
+            'thermal_transmittance': False, 'thermal_transmittance_unit': False, 'is_pitched': False,
+            'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False,
+            'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': False,
+            'insulation_thickness': False
+        }

    def test_clean_roof_edge_cases(self):
        # Insulation thickness edge case
--- a/etl/solar/SolarPhotoSupply.py
+++ b/etl/solar/SolarPhotoSupply.py
@ -0,0 +1,244 @@
+import pandas as pd
+from tqdm import tqdm
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class SolarPhotoSupply:
+    DATASET_COLUMNS = [
+        "UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA",
+        "CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"
+    ]
+
+    def __init__(self, file_directories, cleaned_lookup):
+        """
+        Initialize the SolarPhotoSupply class with file directories and a cleaned lookup. Currently, this class
+        just works with locally stored data, but this could be extended to work with data stored in S3.
+
+        :param file_directories: A list of directories where files are stored.
+        :param cleaned_lookup: A dictionary containing cleaned lookup data.
+        """
+        self.file_directories = file_directories
+
+        self.results = []
+        self.decile_thresholds = None
+
+        self.roof_lookup = pd.DataFrame(cleaned_lookup.get("roof-description"))
+
+        self.photo_supply_lookup = pd.DataFrame()
+        self.floor_area_decile_thresholds = pd.DataFrame()
+
+    def create_dataset(self):
+        """
+        Create a dataset from the provided file directories. This method processes the data files,
+        applies transformations, and aggregates data into a useful format.
+        """
+
+        if self.roof_lookup.empty:
+            raise ValueError("No roof lookup data")
+
+        results = []
+
+        logger.info("Creating solar photo supply dataset")
+        for dir in tqdm(self.file_directories):
+            filepath = dir / "certificates.csv"
+            df = pd.read_csv(filepath, low_memory=False)
+            df = df[~pd.isnull(df["UPRN"])]
+            df["UPRN"] = df["UPRN"].astype(int).astype(str)
+            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+                df = df[~pd.isnull(df[col])]
+            # Take newest LODGEMENT_DATE per UPRN
+            df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
+
+            data = df[self.DATASET_COLUMNS].copy()
+            data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0)
+            data = data[data["PHOTO_SUPPLY"] != 0]
+            results.append(data)
+
+        self.results = pd.concat(results)
+
+        # Convert total floor area to deciles
+        self.decile_thresholds = self.results["TOTAL_FLOOR_AREA"].quantile(
+            [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+        ).values
+
+        self.results["floor_area_decile"] = pd.cut(
+            self.results["TOTAL_FLOOR_AREA"],
+            bins=[0] + list(self.decile_thresholds) + [float('inf')],
+            labels=False,
+            include_lowest=True
+        )
+
+        # Convert tenure to lower
+        self.results["TENURE"] = self.results["TENURE"].str.lower()
+
+        self.results = self.results.merge(
+            self.roof_lookup.drop(
+                columns=[
+                    "clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness",
+                    "is_assumed"
+                ]
+            ),
+            left_on="ROOF_DESCRIPTION",
+            right_on="original_description",
+            how="left"
+        )
+
+        self.photo_supply_lookup = self.results.groupby(
+            [
+                "PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat",
+                "CONSTRUCTION_AGE_BAND", "floor_area_decile"
+            ],
+            observed=True
+        ).agg(
+            {
+                "PHOTO_SUPPLY": ["median", "mean"],
+            }
+        ).reset_index()
+
+        self.photo_supply_lookup.columns = ['_'.join(col).strip() for col in self.photo_supply_lookup.columns.values]
+        # Remove trailing underscore from columns
+        self.photo_supply_lookup.columns = [
+            col[:-1] if col.endswith("_") else col for col in self.photo_supply_lookup.columns.values
+        ]
+        # Convert columns to lowercase
+        self.photo_supply_lookup.columns = [col.lower() for col in self.photo_supply_lookup.columns.values]
+
+        self.floor_area_decile_thresholds = pd.DataFrame(
+            self.decile_thresholds,
+            columns=["floor_area_decile_thresholds"]
+        )
+
+    @staticmethod
+    def classify_floor_area(new_area, thresholds):
+        """
+        Classify a given floor area into a decile based on provided thresholds.
+
+        :param new_area: The new floor area to be classified.
+        :param thresholds: A list of thresholds used for classification.
+        :return: An integer representing the decile index.
+        """
+
+        for i, threshold in enumerate(thresholds):
+            if new_area <= threshold:
+                return i  # Returns the decile index (0 to 9)
+        return len(thresholds)
+
+    def save(self):
+        """
+        Save the processed data to an S3 bucket in the parquet format. This method also handles
+        logging and validation to ensure data is present before saving.
+        """
+        if self.photo_supply_lookup.empty:
+            raise ValueError("No data to save")
+
+        logger.info("Storing outputs to S3")
+        # Store this data in s3 as a parquet file
+
+        save_dataframe_to_s3_parquet(
+            df=self.photo_supply_lookup,
+            bucket_name="retrofit-data-dev",
+            file_key="solar_pv_supply/photo_supply_lookup.parquet",
+        )
+
+        save_dataframe_to_s3_parquet(
+            df=self.floor_area_decile_thresholds,
+            bucket_name="retrofit-data-dev",
+            file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet",
+        )
+
+    @staticmethod
+    def load(bucket):
+        """
+        Load datasets from an S3 bucket.
+
+        :param bucket: The name of the S3 bucket to load data from.
+        :return: A tuple containing photo supply lookup and floor area decile thresholds dataframes.
+        """
+        photo_supply_lookup = read_dataframe_from_s3_parquet(
+            bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet",
+        )
+        floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
+            bucket_name=bucket, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
+        )
+
+        return photo_supply_lookup, floor_area_decile_thresholds
+
+    @classmethod
+    def filter_photo_supply_lookup(
+        cls,
+        photo_supply_lookup: pd.DataFrame,
+        floor_area_decile_thresholds: pd.DataFrame,
+        tenure: str,
+        built_form: str,
+        property_type: str,
+        construction_age_band: str,
+        is_flat: bool,
+        is_pitched: bool,
+        is_roof_room: bool,
+        floor_area: float
+    ):
+
+        """
+        Filter the photo supply lookup to find the most appropriate photo supply for a given property.
+        :param photo_supply_lookup: The photo supply lookup dataframe.
+        :param floor_area_decile_thresholds: The floor area decile thresholds dataframe.
+        :param tenure: The tenure of the property.
+        :param built_form: The built form of the property.
+        :param property_type: The property type of the property.
+        :param construction_age_band: The construction age band of the property.
+        :param is_flat: Whether the property has a flat roof.
+        :param is_pitched: Whether the property has a pitched roof.
+        :param is_roof_room: Whether the property has a roof room.
+        :param floor_area: The floor area of the property.
+        :return:
+        """
+
+        # Convert the tenure to lower case, as is done in the creation of the dataset
+        tenure = tenure.lower()
+        # We remap the "not defined"
+        tenure = {
+            "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is not to "
+            "be used for an existing dwelling":
+                "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is no"
+        }.get(tenure, tenure)
+
+        photo_supply_matched = photo_supply_lookup[
+            (photo_supply_lookup["tenure"] == tenure) &
+            (photo_supply_lookup["built_form"] == built_form) &
+            (photo_supply_lookup["property_type"] == property_type) &
+            (photo_supply_lookup["construction_age_band"] == construction_age_band) &
+            (photo_supply_lookup["is_flat"] == is_flat) &
+            (photo_supply_lookup["is_pitched"] == is_pitched) &
+            (photo_supply_lookup["is_roof_room"] == is_roof_room)
+            ]
+
+        if photo_supply_matched.empty:
+            # There are a small number of cases where we don't get a full match so try again with a more aggregated
+            # average
+            photo_supply_matched = photo_supply_lookup[
+                (photo_supply_lookup["tenure"] == tenure) &
+                (photo_supply_lookup["built_form"] == built_form) &
+                (photo_supply_lookup["property_type"] == property_type)
+                ]
+            if construction_age_band in photo_supply_matched["construction_age_band"].values:
+                photo_supply_matched = photo_supply_matched[
+                    photo_supply_matched["construction_age_band"] == construction_age_band
+                    ]
+
+            if photo_supply_matched.empty:
+                raise ValueError("No photo supply matches")
+
+        floor_area_decile = cls.classify_floor_area(
+            floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values
+        )
+
+        if floor_area_decile in photo_supply_matched["floor_area_decile"].values:
+            photo_supply_matched = photo_supply_matched[
+                photo_supply_matched["floor_area_decile"] == floor_area_decile
+                ]
+
+        return photo_supply_matched
--- a/etl/solar/app.py
+++ b/etl/solar/app.py
@ -0,0 +1,31 @@
+from pathlib import Path
+from etl.epc.property_change_app import get_cleaned
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+    """
+    This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which
+    is the following:
+    "Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply
+    is not present in the property."
+
+    When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible
+    figure to increase this to. This script will pull the data for that, to allow us to try and deduce what
+    a sensible figure would be
+    :return:
+    """
+
+    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+    cleaned_lookup = get_cleaned()
+
+    solar_data_client = SolarPhotoSupply(
+        file_directories=directories,
+        cleaned_lookup=cleaned_lookup
+    )
+
+    solar_data_client.create_dataset()
+
+    solar_data_client.save()
--- a/etl/solar/tests/test_solar_photo_supply.py
+++ b/etl/solar/tests/test_solar_photo_supply.py
@ -0,0 +1,109 @@
+import unittest
+import pandas as pd
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
+
+class TestSolarPhotoSupply(unittest.TestCase):
+
+    def setUp(self):
+        # Mock data for photo_supply_lookup and floor_area_decile_thresholds
+        self.photo_supply_lookup = pd.DataFrame({
+            "tenure": ["leasehold", "freehold"],
+            "built_form": ["detached", "semi-detached"],
+            "property_type": ["house", "flat"],
+            "construction_age_band": ["pre-1900", "1900-1929"],
+            "is_flat": [False, True],
+            "is_pitched": [True, False],
+            "is_roof_room": [False, True],
+            "floor_area_decile": [0, 1],
+            "photo_supply": [100, 200]
+        })
+
+        self.floor_area_decile_thresholds = pd.DataFrame({
+            "floor_area_decile_thresholds": [50, 100]
+        })
+
+        self.solar_photo_supply = SolarPhotoSupply([], {})
+
+    def test_correct_filtering(self):
+        result = self.solar_photo_supply.filter_photo_supply_lookup(
+            self.photo_supply_lookup,
+            self.floor_area_decile_thresholds,
+            "leasehold",
+            "detached",
+            "house",
+            "pre-1900",
+            False,
+            True,
+            False,
+            45
+        )
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result.iloc[0]["photo_supply"], 100)
+
+    def test_no_matches(self):
+        with self.assertRaises(ValueError):
+            self.solar_photo_supply.filter_photo_supply_lookup(
+                self.photo_supply_lookup,
+                self.floor_area_decile_thresholds,
+                "leasehold",
+                "unknown",
+                "house",
+                "pre-1900",
+                False,
+                True,
+                False,
+                45
+            )
+
+    def test_floor_area_decile_matching(self):
+        result = self.solar_photo_supply.filter_photo_supply_lookup(
+            self.photo_supply_lookup,
+            self.floor_area_decile_thresholds,
+            "freehold",
+            "semi-detached",
+            "flat",
+            "1900-1929",
+            True,
+            False,
+            True,
+            60
+        )
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result.iloc[0]["photo_supply"], 200)
+
+    def test_invalid_parameters(self):
+        with self.assertRaises(AttributeError):
+            self.solar_photo_supply.filter_photo_supply_lookup(
+                self.photo_supply_lookup,
+                self.floor_area_decile_thresholds,
+                123,  # Invalid type for tenure
+                "detached",
+                "house",
+                "pre-1900",
+                False,
+                True,
+                False,
+                45
+            )
+
+    def test_classify_floor_area(self):
+        # Setup
+        thresholds = [10, 20, 30, 40, 50]
+        solar_photo_supply = SolarPhotoSupply([], {})
+
+        # Test Case 1: Valid floor area
+        floor_area = 25
+        expected_decile = 2
+        result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
+        self.assertEqual(result, expected_decile, "Decile classification did not match expected result")
+
+        # Test Case 2: Out of range floor area
+        floor_area = 60
+        expected_decile = len(thresholds)
+        result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
+        self.assertEqual(result, expected_decile, "Decile classification for out of range value is incorrect")
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/etl/testing_data/estimate_epc.py
+++ b/etl/testing_data/estimate_epc.py
@ -0,0 +1,194 @@
+from pathlib import Path
+from random import choices, sample
+
+import os
+import pandas as pd
+from tqdm import tqdm
+from dotenv import load_dotenv
+from utils.logger import setup_logger
+from backend.SearchEpc import SearchEpc, vartypes
+from BaseUtility import Definitions
+from etl.epc.settings import BUILT_FORM_REMAP
+
+ENV_FILE = Path(__file__).parent / "backend" / ".env"
+
+logger = setup_logger()
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+DIR_SAMPLE_SIZE = 500
+N_DIRECTORIES = 50
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+load_dotenv(ENV_FILE)
+
+CATETORICALS_TO_IGNORE = [
+    "postcode", "constituency", "local-authority", "built-form", "property-type", "address1", "constituency-label",
+    "building-reference-number", "address2", "posttown", "transaction-type", "lmk-key", "address3",
+    "local-authority-label", "county",
+]
+
+
+def check_numeric_performance(estimated_value, actual_value):
+    # If we don't have anything to compare against, return None
+    if pd.isnull(actual_value):
+        return None
+
+    if pd.isnull(estimated_value):
+        return 1
+
+    if actual_value == 0 and estimated_value == 0:
+        return 0
+
+    if actual_value == 0 and estimated_value != 0:
+        return 1
+
+    return abs(estimated_value - actual_value) / actual_value
+
+
+def app():
+    """
+    This script is used to test the EPC estimation process.
+    """
+
+    numerical_vartypes = {key: value for key, value in vartypes.items() if value in ["float", "Int64"]}
+    str_var_types = {key: value for key, value in vartypes.items() if value == "str"}
+    # Make sure we have missed any keys
+    if len(numerical_vartypes) + len(str_var_types) != len(vartypes):
+        raise ValueError("Not all vartypes have been accounted for")
+
+    # Drop some keys that aren't important
+    for k in CATETORICALS_TO_IGNORE:
+        str_var_types.pop(k, None)
+
+    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+
+    directory_sample = choices(directories, k=N_DIRECTORIES)
+
+    results = []
+
+    for directory in tqdm(directory_sample):
+        filepath = directory / "certificates.csv"
+        df = pd.read_csv(filepath, low_memory=False)
+        df["UPRN"] = df["UPRN"].astype("Int64").astype("str")
+        df = df[~pd.isnull(df["UPRN"])]
+
+        # uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE)
+        # Take a fixed sample based on the first DIR_SAMPLE_SIZE uprns
+        uprn_sample = sorted(df["UPRN"].unique().tolist())[:DIR_SAMPLE_SIZE]
+        df_sample = df[df["UPRN"].isin(uprn_sample)]
+        # Take the record with the newest LODGEMENT_DATETIME by uprn
+        df_sample = df_sample.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+        # Convert the columns to lower case and replace underscores with hyphens, the same as the api
+        df_sample.columns = df_sample.columns.str.lower().str.replace("_", "-")
+
+        # For each epc, we test the estimation process
+        for _, epc in df_sample.iterrows():
+            epc = epc.to_dict()
+            address1 = epc["address1"]
+            postcode = epc["postcode"]
+
+            # Get all EPCs for this urpn and we make sure they get dropped from the estimate_epc function
+            epcs_for_uprn = df[df["UPRN"] == epc["uprn"]]
+            lmks_to_drop = epcs_for_uprn["LMK_KEY"].tolist()
+            searcher = SearchEpc(address1, postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="")
+            searcher.uprn = epc["uprn"]
+
+            # Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
+            # Enclosed End-Terrace
+            built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
+            if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or (
+                built_form in Definitions.DATA_ANOMALY_MATCHES
+            ):
+                built_form = ""
+
+            estimated_epc = searcher.estimate_epc(
+                property_type=epc["property-type"], built_form=built_form, lmks_to_drop=lmks_to_drop
+            )
+
+            # We now compare the difference between the estimated and original
+            # TODO: We can convert windows and lighting to numeric versions and estimate how close we are
+            numeric_performance = {
+                key: check_numeric_performance(estimated_epc[key], epc[key]) for key, value in
+                numerical_vartypes.items()
+            }
+
+            # Remove Nones
+            numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
+            # Get an average
+            numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
+            numeric_success = 1 - numeric_performance
+
+            # categorical performance
+            categorical_performance = {
+                key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
+            }
+            # Get an average
+            categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
+
+            results.append(
+                {
+                    "uprn": epc["uprn"],
+                    "numeric_success": numeric_success,
+                    "categorical_success": categorical_success,
+                    "property_type": epc["property-type"],
+                    "built_form": epc["built-form"],
+                    "tenure": epc["tenure"],
+                }
+            )
+
+    # Get aggregate performance figures
+    results_df = pd.DataFrame(results)
+    results_df["tenure"] = results_df["tenure"].replace("Rented (social)", "rental (social)")
+
+    avg_numeric_succes = results_df["numeric_success"].median()
+    avg_categorical_sucess = results_df["categorical_success"].median()
+
+    # With 20 nearest homes
+    # 0.7718100840549558
+    # 0.5116279069767442
+    # 100 nearest homes
+    # 0.7859617377809409
+    # 0.5348837209302325
+
+    # Fixed sample, sqrt weights
+
+    # Group by tenure
+    by_tenure = results_df.groupby("tenure").agg(
+        {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
+    )
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+
+    # With 20 nearest homes
+    #                                                     numeric_success  categorical_success   uprn
+    # tenure
+    # NO DATA!                                                   0.847840             0.581395    278
+    # Not defined - use in the case of a new dwelling...         0.930282             0.651163    617
+    # Owner-occupied                                             0.770330             0.511628   2588
+    # Rented (private)                                           0.791885             0.558140   1232
+    # owner-occupied                                             0.741088             0.488372  10912
+    # rental (private)                                           0.749064             0.488372   3252
+    # rental (social)                                            0.822109             0.581395   3878
+    # unknown                                                    0.895840             0.627907   1820
+
+    # 100 nearest homes
+    # tenure
+    # NO DATA!                                                   0.899566             0.604651    233
+    # Not defined - use in the case of a new dwelling...         0.927518             0.674419    608
+    # Owner-occupied                                             0.777026             0.511628   3167
+    # Rented (private)                                           0.805646             0.534884   1316
+    # owner-occupied                                             0.762180             0.488372  10835
+    # rental (private)                                           0.760503             0.511628   3181
+    # rental (social)                                            0.830057             0.604651   3705
+    # unknown                                                    0.899948             0.627907   1571
+
+    # By property type - we also want to see how many properties we have for each property type
+    by_property_type = results_df.groupby("property_type").agg(
+        {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
+    )
+    # By property_type & built form
+    by_property_type_built_form = results_df.groupby(["property_type", "built_form"]).agg(
+        {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
+    )
--- a/etl/testing_data/no_epc_input.py
+++ b/etl/testing_data/no_epc_input.py
@ -0,0 +1,42 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 57
+
+
+def app():
+    """
+    This portfolio is for testing windows recommendations
+    :return:
+    """
+
+    test_file = pd.DataFrame(
+        [
+            {"address": "21 Butler House", "postcode": "E2 0PN", "Notes": None},
+            {"address": "22 Butler House", "postcode": "E2 0PN", "Notes": None},
+            {"address": "23 Butler House", "postcode": "E2 0PN", "Notes": None},
+            {"address": "24 Butler House", "postcode": "E2 0PN", "Notes": None},
+        ]
+    )
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/no_epc.csv"
+    save_csv_to_s3(
+        dataframe=test_file,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename
+    }
+    print(body)
--- a/etl/testing_data/windows_portfolio.py
+++ b/etl/testing_data/windows_portfolio.py
@ -0,0 +1,43 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 56
+
+
+def app():
+    """
+    This portfolio is for testing windows recommendations
+    :return:
+    """
+
+    test_file = pd.DataFrame(
+        [
+            {"address": "3 Church Terrace", "postcode": "LE13 0PW", "Notes": None},
+            {"address": "3, Main Street, Redmile", "postcode": "NG13 0GA", "Notes": None},
+            {"address": "Manor House, Kennel Lane, Reepham", "postcode": "LN3 4DZ", "Notes": None},
+            {"address": "13 Main Street", "postcode": "LE14 2JU", "Notes": None},
+            {"address": "8 The Crescent, Coston Road, Buckminster", "postcode": "NG33 5SF", "Notes": None},
+        ]
+    )
+
+    # Store the data in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/windows_portfolio_inputs.csv"
+    save_csv_to_s3(
+        dataframe=test_file,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename
+    }
+    print(body)
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@ -18,6 +18,25 @@ regional_labour_variations = [
    {"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
 ]

+# This data is based on the MCS database
+MCS_SOLAR_PV_COST_DATA = {
+    "last_updated": "2024-01-04",
+    "average_cost_per_kwh": 2013.94,
+    "average_cost_per_kwh-Outer London": 2618.75,
+    "average_cost_per_kwh-Inner London": 2618.75,
+    "average_cost_per_kwh-South East England": 2083.33,
+    "average_cost_per_kwh-South West England": 2113,
+    "average_cost_per_kwh-East of England": 1973.86,
+    "average_cost_per_kwh-East Midlands": 1981.86,
+    "average_cost_per_kwh-West Midlands": 1926.55,
+    "average_cost_per_kwh-North East England": 2028.49,
+    "average_cost_per_kwh-North West England": 1620.42,
+    "average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
+    "average_cost_per_kwh-Wales": 1898.83,
+    "average_cost_per_kwh-Scotland": 1967.97,
+    "average_cost_per_kwh-Northern Ireland": 2126.09,
+}
+

 class Costs:
    """
@ -42,7 +61,7 @@ class Costs:

    # We use a higher contingency rate for internal wall insulation because of the potential for issues with moving
    # fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
-    IWI_CONTINGENCY = 0.15
+    IWI_CONTINGENCY = 0.2

    # Where there is more uncertainty, a higher contingency rate is used
    HIGH_RISK_CONTINGENCY = 0.2
@ -58,12 +77,22 @@ class Costs:
    # have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate.
    # For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might
    # need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required
-    EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.15
-    EWI_SCAFFOLDING_PRELIMINARIES = 0.20
+    EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.2
+    EWI_SCAFFOLDING_PRELIMINARIES = 0.25

    VAT_RATE = 0.2
    PROFIT_MARGIN = 0.2

+    # Based on this greenmatch article, on average, a Sash window is around 50% more expensive than a casement window.
+    # Therefore, for a conservative cost estimate, and allowance for a more premium window type, we inflate the material
+    # cost of the windows to allow for a sash window type
+    # https://www.greenmatch.co.uk/windows/double-glazing/cost
+    SASH_WINDOW_INFLATION_FACTOR = 1.5
+
+    # Typically, secondary glazing can be installed for 25% of the cost of double glazed windows - to be conservative,
+    # we scale the cost by half
+    SECONDARY_GLAZING_SCALING_FACTOR = 0.5
+
    def __init__(self, property_instance):
        """
        Initializes the Costs class with a property instance.
@ -147,12 +176,16 @@ class Costs:
        """
        material_cost_per_m2 = material["material_cost"]

+        # We inflate material costs due to recent price increases
+        material_cost_per_m2 = material_cost_per_m2 * 1.5
+
        base_material_cost = material_cost_per_m2 * floor_area
        labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor

        subtotal_before_profit = base_material_cost + labour_cost

-        contingency_cost = subtotal_before_profit * self.CONTINGENCY
+        # We use high risk contingency because of the possibility of access issues and clearing existing insulation
+        contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
        preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
        profit_cost = subtotal_before_profit * self.PROFIT_MARGIN

@ -719,3 +752,121 @@ class Costs:
            "labour_days": labour_days,
            "labour_cost": labour_costs
        }
+
+    def window_glazing(self, number_of_windows, material, is_secondary_glazing=False):
+        """
+        We characterise the jobs to be done for window glazing as the following:
+        1) Initial Assessment and Measurements: Before removing the existing window, it's essential to assess the
+        condition of the window frame and opening. Precise measurements are taken to ensure the new double glazed
+        windows fit perfectly.
+
+        2) Remove the Existing Window: This involves carefully dismantling and removing the old single glazed window. It
+        requires skill to avoid damaging the surrounding wall and the window frame (if it's to be reused).
+
+        3) Dispose of the Existing Window: The old window, especially if it's a single glazed unit, needs to be
+        disposed of responsibly. Glass and other materials should be recycled where possible.
+
+        4) Surface Preparation: The window opening might need some preparation, especially if there's damage or if
+        adjustments are needed to accommodate the new window. This can include repairing or replacing parts of the
+        window frame, sealing gaps, and ensuring the opening is level and square.
+
+        5) Install the Window Frame (if new frames are used): In many cases, double glazed windows come with their
+        frames. These need to be installed securely into the window opening. This process involves aligning, leveling,
+        and fixing the frame in place.
+
+        6) Install the Window Sill: If a new window sill is required, it is installed at this stage. It needs to be
+        correctly aligned with the frame and securely attached.
+
+        7) Install the Double Glazed Glass Units: The glass units are carefully inserted into the frame. This step
+        requires precision to ensure a snug fit without causing stress on the glass, which could lead to cracking or
+        breaking.
+
+        8) Sealing and Weatherproofing: After the glass units are in place, it's crucial to seal around the frame and
+        between the glass and frame to ensure there are no drafts and that the installation is weather-tight. This
+        typically involves applying silicone sealant or other appropriate sealing materials.
+
+        9) Finishing Touches: This includes any cosmetic work, such as trimming, painting, or staining the frame and
+        sill to match the rest of the property. It might also involve cleaning up any mess created during the
+        installation.
+
+        10) Inspection and Testing: Finally, the new windows should be inspected to ensure they open, close, and lock
+        correctly. This is also a good time to check for any gaps or issues with the sealing.
+
+        For this cost estimation process, we factor in initial assement into the preliminaries
+
+        """
+
+        material_cost = material["material_cost"] * number_of_windows
+
+        labour_cost = (
+            material["labour_cost"] * number_of_windows * self.labour_adjustment_factor
+        )
+        multiplier = self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else (
+            self.SASH_WINDOW_INFLATION_FACTOR)
+
+        subtotal = (material_cost + labour_cost) * multiplier
+
+        contingency_cost = subtotal * self.CONTINGENCY
+        preliminaries_cost = subtotal * self.PRELIMINARIES
+        profit_cost = subtotal * self.PROFIT_MARGIN
+
+        subtotal_before_vat = subtotal + contingency_cost + preliminaries_cost + profit_cost
+
+        vat_cost = subtotal_before_vat * self.VAT_RATE
+
+        total_cost = subtotal_before_vat + vat_cost
+
+        labour_hours = material["labour_hours_per_unit"] * number_of_windows
+        labour_hours = labour_hours * self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else labour_hours
+
+        # Assume a team of 2
+        labour_days = (labour_hours / 8) / 2
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat_cost,
+            "contingency": contingency_cost,
+            "preliminaries": preliminaries_cost,
+            "material": material_cost,
+            "profit": profit_cost,
+            "labour_hours": labour_hours,
+            "labour_cost": labour_cost,
+            "labour_days": labour_days
+        }
+
+    def solar_pv(self, wattage: float):
+
+        """
+        Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
+        costing data for installations of renewable and clean energy measures.
+
+        The data in the dashboard is filtered on domestic building installations and then the data across the
+        various regions is manually collected. There is currently no automated way to get the data from the MCS
+        dashboard
+
+        Price can also be benchmarked against this checkatrade article:
+        https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
+        :param wattage: Peak wattage of the solar PV system
+        :return:
+        """
+
+        # Get the cost data relevant to the region
+        regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
+
+        kw = wattage / 1000
+        total_cost = kw * regional_cost
+
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        # Labour hours are based on estimates from online research but an average team seems to consist of 3 people
+        # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of
+        # labour
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": 72,
+            "labour_days": 2,
+        }
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@ -6,6 +6,8 @@ from recommendations.RoofRecommendations import RoofRecommendations
 from recommendations.VentilationRecommendations import VentilationRecommendations
 from recommendations.FireplaceRecommendations import FireplaceRecommendations
 from recommendations.LightingRecommendations import LightingRecommendations
+from recommendations.SolarPvRecommendations import SolarPvRecommendations
+from recommendations.WindowsRecommendations import WindowsRecommendations
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings


@ -35,6 +37,8 @@ class Recommendations:
        )
        self.fireplace_recommender = FireplaceRecommendations(property_instance=property_instance)
        self.lighting_recommender = LightingRecommendations(property_instance=property_instance, materials=materials)
+        self.windows_recommender = WindowsRecommendations(property_instance=property_instance, materials=materials)
+        self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)

    def recommend(self):

@ -77,6 +81,16 @@ class Recommendations:
        if self.lighting_recommender.recommendation:
            property_recommendations.append(self.lighting_recommender.recommendation)

+        # Windows recommendations
+        self.windows_recommender.recommend()
+        if self.windows_recommender.recommendation:
+            property_recommendations.append(self.windows_recommender.recommendation)
+
+        # Solar recommendations
+        self.solar_recommender.recommend()
+        if self.solar_recommender.recommendation:
+            property_recommendations.append(self.solar_recommender.recommendation)
+
        # We insert temporary ids into the recommendations which is important for the optimiser later
        property_recommendations = self.insert_temp_recommendation_id(property_recommendations)

@ -148,6 +162,8 @@ class Recommendations:
                    # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
                    rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT)

+                # Round to 2 decimal places
+                rec["sap_points"] = round(rec["sap_points"], 2)
                rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon

                # Energy consumption current is per meter squared, so we need to multiply by the floor area to get
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@ -0,0 +1,65 @@
+import numpy as np
+from recommendations.Costs import Costs
+
+
+class SolarPvRecommendations:
+    # Approximate area of the solar panels
+    SOLAR_PANEL_AREA = 1.6
+    # Wattage per panel
+    SOLAR_PANEL_WATTAGE = 360
+
+    def __init__(self, property_instance):
+        """
+        :param property_instance: Instance of the Property class, for the home associated to property_id
+        """
+
+        self.property = property_instance
+        self.costs = Costs(self.property)
+
+        self.recommendation = []
+
+    def recommend(self):
+        """
+        We check if a property is potentially suitable for solar PV based on the following criteria:
+        - The property is a house or bungalow
+        - The property has a flat or pitched roof
+        - The property does not have existing solar pv
+        :return:
+        """
+
+        is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+        is_valid_roof_type = (
+            self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
+        )
+        # If there is no existing solar PV, the photo-supply field will be None or a missing value
+        has_no_existing_solar_pv = self.property.data["photo-supply"] in [
+            None, 0, self.property.DATA_ANOMALY_MATCHES
+        ]
+
+        if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
+            return
+
+        # We now have a property which is potentially suitable for solar PV
+        number_solar_panels = np.floor(self.property.solar_pv_roof_area / self.SOLAR_PANEL_AREA)
+        solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
+
+        # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
+        # of solar PV installations
+        cost_result = self.costs.solar_pv(wattage=solar_panel_wattage)
+
+        kw = int(np.round(solar_panel_wattage / 1000))
+
+        self.recommendation = [
+            {
+                "parts": [],
+                "type": "solar_pv",
+                "description": f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof",
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                **cost_result,
+                # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
+                # back up here
+                "photo_supply": 100 * self.property.solar_pv_percentage
+            }
+        ]
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@ -0,0 +1,97 @@
+from typing import List
+
+import numpy as np
+
+from backend.Property import Property
+from recommendations.Costs import Costs
+
+
+class WindowsRecommendations:
+    # If the property has existing glazing, we scale down the number of windows that need to be glazed
+    COVERAGE_MAP = {
+        # If most of the windows have already been glazed, we assume that 2/3 are glazed and 1/2 are remaining to be
+        # glazed
+        "most": 0.33,
+        # If glazing is partial, we assume 50/50 split between glazed and unglazed
+        "partial": 0.5
+    }
+
+    def __init__(self, property_instance: Property, materials: List):
+        self.property = property_instance
+        self.costs = Costs(self.property)
+
+        self.recommendation = []
+
+        self.glazing_material = [
+            material for material in materials if material["type"] == "windows_glazing"
+        ]
+
+        if len(self.glazing_material) != 1:
+            raise ValueError("There should only be one window glazing material")
+        self.glazing_material = self.glazing_material[0]
+
+    def recommend(self):
+        """
+        This method will recommend the best possible glazing options for a property.
+
+        In order to do this, we need to estimate the number of windows that the home has. This information will be
+        stored in the property object, under property.number_of_windows
+        :return:
+        """
+
+        # If the property is in a conservation area or is a listed building, it becomes more difficult to install
+        # double glazing. Therefore, we don't recommend it. It is still possible but is not practical as it
+        # requires planning permission and might require a more expensive window type, such as timber.
+
+        number_of_windows = self.property.number_of_windows
+        is_secondary_glazing = self.property.restricted_measures or (
+            self.property.windows["glazing_type"] == "secondary"
+        )
+
+        if not number_of_windows:
+            raise ValueError("Number of windows not specified")
+
+        if self.property.windows["has_glazing"] & (self.property.windows["glazing_coverage"] == "full"):
+            return
+
+        # We scale the number of windows based on the proportion of existing glazing
+        if self.property.data["multi-glaze-proportion"] != "":
+            n_windows_scalar = 1 - (int(self.property.data["multi-glaze-proportion"]) / 100)
+        else:
+            n_windows_scalar = self.COVERAGE_MAP.get(self.property.windows["glazing_coverage"], 1)
+
+        number_of_windows *= n_windows_scalar
+        number_of_windows = np.ceil(number_of_windows)
+
+        # We then price the job based on the number of windows that there are
+        cost_result = self.costs.window_glazing(
+            number_of_windows=number_of_windows,
+            material=self.glazing_material,
+            is_secondary_glazing=is_secondary_glazing
+        )
+
+        glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
+        if self.property.windows["glazing_coverage"] in ["partial", "most"]:
+            description = f"Install {glazing_type} to the remaining windows"
+        else:
+            description = f"Install {glazing_type} to all windows"
+
+        if self.property.is_listed:
+            description += ". Secondary glazing recommended due to listed building status"
+        elif self.property.is_heritage:
+            description += ". Secondary glazing recommended due to herigate building status"
+        elif self.property.in_conservation_area:
+            description += ". Secondary glazing recommended due to conservation area status"
+
+        self.recommendation = [
+            {
+                "parts": [],
+                "type": "windows_glazing",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                **cost_result,
+                "is_secondary_glazing": is_secondary_glazing
+            }
+        ]
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@ -1,4 +1,5 @@
 import math
+from datetime import datetime
 from copy import deepcopy
 from typing import Union

@ -565,7 +566,7 @@ def estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
        'Detached': 4,
    }

-    exposed_wall_area = total_wall_area * (number_exposed_walls[built_form] / 4)
+    exposed_wall_area = total_wall_area * (number_exposed_walls.get(built_form, 3) / 4)

    return exposed_wall_area

@ -669,3 +670,87 @@ def esimtate_pitched_roof_area(floor_area: float, floor_height: float) -> float:
    area = 2 * (slope * wall_width)

    return area
+
+
+def estimate_windows(
+    property_type, built_form, construction_age_band, floor_area, number_habitable_rooms, extension_count
+):
+    # Base window count based on habitable rooms
+    window_count = number_habitable_rooms
+
+    # Additional windows for non-habitable rooms (e.g., kitchen, bathroom)
+    # Assuming most houses will have at least one kitchen and one bathroom
+    # Scale non-habitable windows with the number of habitable rooms
+    non_habitable_base = 2  # Base for kitchen and bathroom
+    extra_non_habitable = max(0, (number_habitable_rooms - 3) // 2)  # Extra for large houses
+    window_count += non_habitable_base + extra_non_habitable
+
+    # Adjustments based on built form and property type
+    if property_type in ["House", "Bungalow"] and built_form in ["Semi-Detached", "Detached"]:
+        built_form_lookup = {
+            "Semi-Detached": 3,
+            "Detached": 4,
+        }
+    else:
+        # For Flats and Maisonettes, adjustments might be less
+        built_form_lookup = {
+            "Mid-Terrace": 0,
+            "End-Terrace": 1,
+            "Semi-Detached": 1,
+            "Detached": 2,
+        }
+    window_count += built_form_lookup.get(built_form, 0)
+
+    # Adjust for floor area (larger floor area might indicate more rooms/windows)
+    if floor_area < 85:  # Small to medium properties
+        # Standard window count likely sufficient
+        pass
+    elif 85 <= floor_area <= 120:  # Medium to large properties
+        # More rooms or larger rooms likely, potentially more windows
+        window_count += 1
+    elif floor_area > 120:  # Very large properties
+        # Likely to have significantly more or larger rooms
+        window_count += 2
+
+    # Adjust for construction age band
+    if construction_age_band in ["England and Wales: before 1900", "England and Wales: 1900-1929"]:
+        # Older houses with smaller, more numerous windows
+        window_count += 1
+
+    # Adjust for extensions (each extension might add windows)
+    window_count += extension_count
+
+    # Adjustments for specific property types
+    if property_type in ["Flat", "Maisontte"]:
+        # Flats might have fewer windows due to shared walls
+        # Maisonettes might follow a similar pattern to flats or small houses
+        window_count -= 1
+
+    # Ensure window count is not negative
+    if window_count < 0:
+        raise ValueError("Window count cannot be negative.")
+
+    return window_count
+
+
+def calculate_cavity_age(newest_epc, older_epcs, cleaned):
+    all_epcs = [newest_epc] + older_epcs
+
+    df = []
+    for x in all_epcs:
+        # Get the cleaned mapping
+        mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]]
+        if not mapped:
+            continue
+        df.append(
+            {
+                **mapped[0],
+                "inspection-date": x["lodgement-date"],
+            }
+        )
+
+    df = pd.DataFrame(df)
+    df = df[df["is_cavity_wall"] & df["is_filled_cavity"]]
+
+    cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days
+    return cavity_age
--- a/recommendations/tests/test_costs.py
+++ b/recommendations/tests/test_costs.py
@ -1,6 +1,7 @@
 from recommendations.Costs import Costs
 from unittest.mock import Mock
 import datetime
+import pytest


 class TestCosts:
@ -58,9 +59,9 @@ class TestCosts:
        )

        assert loft_results == {
-            'total': 430.21445040000003, 'subtotal': 358.512042, 'vat': 71.70240840000001,
-            'contingency': 25.608003000000004, 'preliminaries': 25.608003000000004, 'material': 198.29923000000002,
-            'profit': 51.21600600000001, 'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625
+            'total': 639.4133610000001, 'subtotal': 532.8444675000001, 'vat': 106.56889350000002,
+            'contingency': 71.045929, 'preliminaries': 35.5229645, 'material': 297.448845, 'profit': 71.045929,
+            'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625
        }

    def test_internal_wall_insulation(self):
@ -176,11 +177,9 @@ class TestCosts:
        )

        assert iwi_results == {
-            'total': 6650.889456921851, 'subtotal': 5542.407880768209, 'vat': 1108.4815761536418,
-            'contingency': 573.3525393898148, 'preliminaries': 382.2350262598765,
-            'material': 1747.488000615996,
-            'profit': 764.470052519753, 'labour_hours': 88.23759388401297,
-            'labour_days': 2.757424808875405,
+            'total': 6880.2304726777775, 'subtotal': 5733.525393898148, 'vat': 1146.7050787796295,
+            'contingency': 764.470052519753, 'preliminaries': 382.2350262598765, 'material': 1747.488000615996,
+            'profit': 764.470052519753, 'labour_hours': 88.23759388401297, 'labour_days': 2.757424808875405,
            'labour_cost': 1927.1602026551818
        }

@ -414,8 +413,8 @@ class TestCosts:
        )

        assert ewi_results == {
-            'total': 14561.688989159393, 'subtotal': 12134.740824299493, 'vat': 2426.948164859899,
-            'contingency': 808.9827216199662, 'preliminaries': 1617.9654432399325, 'material': 4020.565147410677,
+            'total': 15047.078622131372, 'subtotal': 12539.232185109477, 'vat': 2507.8464370218953,
+            'contingency': 808.9827216199662, 'preliminaries': 2022.4568040499155, 'material': 4020.565147410677,
            'profit': 1617.9654432399325, 'labour_hours': 187.02533486285358, 'labour_days': 5.8445417144641745,
            'labour_cost': 3921.5600094613983
        }
@ -499,3 +498,48 @@ class TestCosts:
                                           'labour_hours': 24.79, 'labour_days': 1.549375, 'labour_cost': 186.9032}

        assert costs.labour_adjustment_factor == 0.88
+
+    # Mock property instance for regional tests
+    @pytest.fixture(params=[
+        ("Northamptonshire", "East Midlands", 7927.44),
+        ("Greater London Authority", "Inner London", 10475.0),
+        ("Adur", "South East England", 8333.32),
+        ("Bournemouth", "South West England", 8452),
+        ("Basildon", "East of England", 7895.44),
+        ("Birmingham", "West Midlands", 7706.2),
+        ("County Durham", "North East England", 8113.96),
+        ("Allerdale", "North West England", 6481.68),
+        ("York", "Yorkshire and the Humber", 8243.6),
+        ("Cardiff", "Wales", 7595.32),
+        ("Glasgow City", "Scotland", 7871.88),
+        ("Belfast", "Northern Ireland", 8504.36)
+    ])
+    def mock_property_with_region(self, request):
+        county, region, expected_cost = request.param
+        mock_property = Mock()
+        mock_property.data = {"county": county}
+        return mock_property, region, expected_cost
+
+    # Test for different wattages
+    @pytest.mark.parametrize("wattage, expected_cost", [
+        (3000, 5945.58),
+        (4000, 7927.44),
+        (5000, 9909.3),
+        (6000, 11891.16),
+    ])
+    def test_solar_pv_different_wattages(self, wattage, expected_cost):
+        mock_property = Mock()
+        mock_property.data = {"county": "Mansfield"}
+        costs = Costs(mock_property)
+        result = costs.solar_pv(wattage)
+        assert result['total'] == pytest.approx(expected_cost, rel=0.01)
+
+    def test_solar_pv_regional_variation(self, mock_property_with_region):
+        # Test for regional cost variations
+        property_instance, expected_region, expected_cost = mock_property_with_region
+        costs = Costs(property_instance)
+
+        assert costs.region == expected_region
+
+        result = costs.solar_pv(4000)  # Testing with a fixed wattage of 4000
+        assert result['total'] == pytest.approx(expected_cost, rel=0.01)
--- a/recommendations/tests/test_data/materials.py
+++ b/recommendations/tests/test_data/materials.py
@ -942,8 +942,24 @@ materials = [
             'https://www.hamuch.com/cost/led-spot-light#:~:text=It%20costs%20an%20average%20of,'
             'will%20drive%20up%20the%20cost.',
     'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907), 'is_active': True, 'prime_material_cost': None,
-     'material_cost': 20.0, 'labour_cost': 46.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0,
+     'material_cost': 20.0, 'labour_cost': 15.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0,
     'notes': 'We estimate the unit economics from the checkatrade article. We assume that the average job consists '
              'of installing 6 lights based on the hamuch article. We use the median value of 400 for a job of 6 '
-              'lights'}
+              'lights'},
+    {'id': 1235, 'type': 'windows_glazing',
+     'description': 'uPVC windows; Profile 22 or other equal and approved; reinforced where appropriate with '
+                    'aluminium alloy; in refurbishment work, including standard ironmongery; sills and factory glazed '
+                    'with low-e 24 mm double glazing; removing existing windows and fixing new in position; including '
+                    'lugs plugged and screwed to brickwork or blockwork; Casement/fixed light; including vents; '
+                    'e.p.d.m. glazing gaskets and weather seals; 1770 mm × 1200 mm; ref P312WW',
+     'depth': 0.0, 'depth_unit': None, 'cost': None, 'cost_unit': 'gbp_per_unit', 'r_value_per_mm': None,
+     'r_value_unit': 'square_meter_kelvin_per_watt', 'thermal_conductivity': None, 'thermal_conductivity_unit': None,
+     'link': 'SPONs',
+     'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907),
+     'is_active': True, 'prime_material_cost': 176.55,
+     'material_cost': 182.25, 'labour_cost': 163.36, 'labour_hours_per_unit': 6.5, 'plant_cost': 0.0,
+     'total_cost': 345.61,
+     'notes': 'This is the cost of removal of existing windows and installation of new windows. This is a casement '
+              'style window, which is the most common but also the cheapest style. In the cost estimation framework, '
+              'we can inflate prices for different finishes, to be conservative on price.'}
 ]
--- a/recommendations/tests/test_fireplace_recommendations.py
+++ b/recommendations/tests/test_fireplace_recommendations.py
@ -6,7 +6,7 @@ from recommendations.FireplaceRecommendations import FireplaceRecommendations
 class TestFirepaceRecommendations:

    def test_no_fireplaces(self):
-        property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance = Property(id=0, address="fake", postcode="fake")
        property_instance.data = {
            "number-open-fireplaces": 0
        }
@ -22,7 +22,7 @@ class TestFirepaceRecommendations:
        assert recommender.recommendation is None

    def test_one_fireplace(self):
-        property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance = Property(id=0, address="fake", postcode="fake")
        property_instance.data = {
            "number-open-fireplaces": 1
        }
@ -40,7 +40,7 @@ class TestFirepaceRecommendations:
        assert recommender.recommendation[0]["total"] == 300

    def test_multiple_fireplaces(self):
-        property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance = Property(id=0, address="fake", postcode="fake")
        property_instance.data = {
            "number-open-fireplaces": 3
        }
--- a/recommendations/tests/test_floor_recommendations.py
+++ b/recommendations/tests/test_floor_recommendations.py
@ -21,16 +21,6 @@ class TestFloorRecommendations:
        ) as f:
            return pickle.load(f)

-    @pytest.fixture
-    def mock_floor_rec_instance(self):
-        # Creating a mock instance of WallRecommendations with the necessary attributes
-        property_mock = Mock()
-        property_mock.full_sap_epc = {"lodgement-date": "2000-01-01"}
-        property_mock.data = {"county": "York"}
-
-        mock_wall_rec_instance = FloorRecommendations(property_mock, materials)
-        return mock_wall_rec_instance
-
    def test_init(self, input_properties):
        input_properties[0].insulation_floor_area = 50
        input_properties[0].insulation_wall_area = 90
@ -68,6 +58,7 @@ class TestFloorRecommendations:
        input_properties[2].wall_type = "solid brick"
        input_properties[2].floor_type = "suspended"
        input_properties[2].number_of_floors = 1
+        input_properties[2].floor_level = 0

        recommender = FloorRecommendations(property_instance=input_properties[2], materials=materials)
        assert recommender.estimated_u_value is None
@ -93,6 +84,8 @@ class TestFloorRecommendations:
        input_properties[3].insulation_floor_area = 100
        input_properties[3].insulation_wall_area = 100
        input_properties[3].number_of_floors = 1
+        input_properties[3].floor_level = 0
+
        recommender = FloorRecommendations(property_instance=input_properties[3], materials=materials)
        assert recommender.estimated_u_value is None
        recommender.recommend()
@ -114,6 +107,7 @@ class TestFloorRecommendations:
        input_properties[4].wall_type = "solid brick"
        input_properties[4].floor_type = "solid"
        input_properties[4].number_of_floors = 1
+        input_properties[4].floor_level = 0

        # In this case, we have no county, so in this case, it should yse the local-authority-label if possible
        input_properties[4].data["county"] = ""
--- a/recommendations/tests/test_lighting_recommendations.py
+++ b/recommendations/tests/test_lighting_recommendations.py
@ -9,7 +9,7 @@ from recommendations.tests.test_data.materials import materials
 class TestLightingRecommendations:

    def test_init_invalid_materials(self):
-        input_property0 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property0 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property0.lighting = {"low_energy_proportion": 0}
        input_property0.data = {"county": "Greater London Authority"}
        # Test for invalid materials
@ -18,7 +18,7 @@ class TestLightingRecommendations:

    def test_recommend_no_action_needed(self):
        # Case where no recommendation is needed
-        input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property1.lighting = {"low_energy_proportion": 100}
        input_property1.data = {"county": "Greater London Authority"}

@ -28,7 +28,7 @@ class TestLightingRecommendations:

    def test_recommend_action_needed(self):
        # Case where recommendation is needed
-        input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property1.lighting = {"low_energy_proportion": 100}
        input_property1.data = {"county": "Greater London Authority"}
        input_property1.lighting = {"low_energy_proportion": 0.80}
@ -40,8 +40,7 @@ class TestLightingRecommendations:

        assert lr.recommendation == [
            {'parts': [], 'type': 'low_energy_lighting', 'description': 'Install low energy lighting in 4 outlets',
-             'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 458.976, 'subtotal': 382.48,
-             'vat': 76.49600000000001, 'contingency': 27.320000000000007, 'preliminaries': 27.320000000000007,
-             'material': 80.0, 'profit': 54.640000000000015, 'labour_hours': 3.2, 'labour_days': 0.4,
-             'labour_cost': 193.20000000000002}
+             'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 240.24,
+             'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3, 'preliminaries': 14.3,
+             'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0}
        ]
--- a/recommendations/tests/test_recommendation_utils.py
+++ b/recommendations/tests/test_recommendation_utils.py
@ -427,3 +427,106 @@ def test_external_wall_area():
    for num_floors, floor_height, perimeter, built_form, expected in test_cases:
        result = recommendation_utils.estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
        assert result == expected, f"Test failed for {built_form}: Expected {expected}, got {result}"
+
+
+def test_estimate_windows():
+    # Based on data from an EPR that has 4 windows
+    windows_case_1 = recommendation_utils.estimate_windows(
+        property_type="Flat",
+        built_form="Semi-Detached",
+        construction_age_band="England and Wales: 1976-1982",
+        floor_area=37,
+        number_habitable_rooms=2,
+        extension_count=0,
+    )
+
+    assert windows_case_1 == 4, f"Expected 4 windows, got {windows_case_1}"
+
+    # Based on data from an EPR that has 7 winows, however two of the windows were very small, having areas of
+    # 0.21m^2 and 0.3m^2 respectively. We see 6 as a reasonable estimate for the number of windows
+    windows_case_2 = recommendation_utils.estimate_windows(
+        property_type="House",
+        built_form="Mid-Terrace",
+        construction_age_band="England and Wales: 1950-1966",
+        floor_area=69,
+        number_habitable_rooms=4,
+        extension_count=0,
+    )
+
+    assert windows_case_2 == 6, f"Expected 6 windows, got {windows_case_2}"
+
+    # Based on data from an EPR on a bungalow, that has 6 windows. Two of the windows are small, both have a 0.4m^2 area
+    # and so 5 windows is an acceptable estimate
+    windows_case_3 = recommendation_utils.estimate_windows(
+        property_type="Bungalow",
+        built_form="Mid-Terrace",
+        construction_age_band="England and Wales: 1967-1975",
+        floor_area=56,
+        number_habitable_rooms=3,
+        extension_count=0,
+    )
+
+    assert windows_case_3 == 5, f"Expected 5 windows, got {windows_case_3}"
+
+    # Based on data from an EPR on a end terrace house that has 8 windows. One of the windows is very small, with an
+    # area of 0.25 m^2 and so 7 windows is an acceptable estimate
+    windows_case_4 = recommendation_utils.estimate_windows(
+        property_type="House",
+        built_form="End-Terrace",
+        construction_age_band="England and Wales: 1967-1975",
+        floor_area=77.28,
+        number_habitable_rooms=4,
+        extension_count=0,
+    )
+
+    assert windows_case_4 == 7, f"Expected 7 windows, got {windows_case_4}"
+
+    # Based on data from an EPR on a Semi-detatched house that has 11 windows based on the associated condition report
+    # Right now, we estimate 12 windows for this property
+    windows_case_5 = recommendation_utils.estimate_windows(
+        property_type="House",
+        built_form="Semi-Detached",
+        construction_age_band="England and Wales: 1950-1966",
+        floor_area=88.4,
+        number_habitable_rooms=5,
+        extension_count=0,
+    )
+
+    assert windows_case_5 == 12, f"Expected 12 windows, got {windows_case_5}"
+
+    # Based on Khalim's flat which has 3 windows. There is no construction age band on the EPC. The windows are large
+    # so an estimate of 5 windows is a reasonable estimate
+    windows_case_6 = recommendation_utils.estimate_windows(
+        property_type="Flat",
+        built_form="",
+        construction_age_band="",
+        floor_area=100,
+        number_habitable_rooms=3,
+        extension_count=0,
+    )
+
+    assert windows_case_6 == 5, f"Expected 5 windows, got {windows_case_6}"
+
+    # Based on an EPR semi detatched house though we don't have the exact number of windows. We estimate 10
+    windows_case_7 = recommendation_utils.estimate_windows(
+        property_type="House",
+        built_form="Semi-Detached",
+        construction_age_band="England and Wales: 1967-1975",
+        floor_area=85,
+        number_habitable_rooms=4,
+        extension_count=0,
+    )
+
+    assert windows_case_7 == 10, f"Expected 10 windows, got {windows_case_7}"
+
+    # Base on Khalim's parents flat
+    windows_case_8 = recommendation_utils.estimate_windows(
+        property_type="Flat",
+        built_form="End-Terrace",
+        construction_age_band="",
+        floor_area=50,
+        number_habitable_rooms=3,
+        extension_count=0,
+    )
+
+    assert windows_case_8 == 5, f"Expected 5 windows, got {windows_case_8}"
--- a/recommendations/tests/test_roof_recommendations.py
+++ b/recommendations/tests/test_roof_recommendations.py
@ -1,5 +1,4 @@
 from backend.Property import Property
-from unittest.mock import Mock
 from recommendations.RoofRecommendations import RoofRecommendations
 from recommendations.tests.test_data.materials import materials

@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
 class TestRoofRecommendations:

    def test_loft_insulation_recommendation_no_insulation(self):
-        property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance = Property(id=0, address="fake", postcode="fake")
        property_instance.age_band = "F"
        property_instance.insulation_floor_area = 100
        property_instance.roof = {
@ -32,7 +31,7 @@ class TestRoofRecommendations:
        assert len(roof_recommender.recommendations)

    def test_loft_insulation_recommendation_50mm_insulation(self):
-        property_instance2 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance2 = Property(id=0, address="fake", postcode="fake")
        property_instance2.age_band = "F"
        property_instance2.insulation_floor_area = 100
        property_instance2.roof = {
@ -54,11 +53,11 @@ class TestRoofRecommendations:

        assert len(roof_recommender2.recommendations) == 1

-        assert roof_recommender2.recommendations[0]["total"] == 1310.56464
+        assert roof_recommender2.recommendations[0]["total"] == 1936.9206000000004
        assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14
        assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68

-        property_instance3 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance3 = Property(id=0, address="fake", postcode="fake")
        property_instance3.age_band = "F"
        property_instance3.insulation_floor_area = 100
        property_instance3.roof = {
@ -83,7 +82,7 @@ class TestRoofRecommendations:
        assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 270

    def test_loft_insulation_recommendation_150mm_insulation(self):
-        property_instance4 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance4 = Property(id=0, address="fake", postcode="fake")
        property_instance4.age_band = "F"
        property_instance4.insulation_floor_area = 100
        property_instance4.roof = {
@ -105,12 +104,12 @@ class TestRoofRecommendations:

        assert len(roof_recommender4.recommendations) == 4

-        assert roof_recommender4.recommendations[0]["total"] == 788.0544
+        assert roof_recommender4.recommendations[0]["total"] == 1128.744
        assert roof_recommender4.recommendations[0]["new_u_value"] == 0.15
        assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3
        assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 150

-        property_instance5 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance5 = Property(id=0, address="fake", postcode="fake")
        property_instance5.age_band = "F"
        property_instance5.insulation_floor_area = 100
        property_instance5.roof = {
@ -137,7 +136,7 @@ class TestRoofRecommendations:

    def test_loft_insulation_recommendation_270mm_insulation(self):
        # We shouldn't recommend anything in this case
-        property_instance6 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance6 = Property(id=0, address="fake", postcode="fake")
        property_instance6.age_band = "F"
        property_instance6.insulation_floor_area = 100
        property_instance6.roof = {
@ -278,7 +277,7 @@ class TestRoofRecommendations:
    #            "Insulate your room roof with 270mm of Example room roof insulation"

    def test_flat_no_insulation(self):
-        property_instance11 = Property(id=11, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance11 = Property(id=11, address="fake", postcode="fake")
        property_instance11.age_band = "D"
        property_instance11.insulation_floor_area = 33.5
        property_instance11.perimeter = 24
@ -307,7 +306,7 @@ class TestRoofRecommendations:
               "Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"

    def test_flat_insulated(self):
-        property_instance12 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance12 = Property(id=12, address="fake", postcode="fake")
        property_instance12.age_band = "D"
        property_instance12.insulation_floor_area = 40
        property_instance12.perimeter = 30
@ -331,7 +330,7 @@ class TestRoofRecommendations:
        assert not roof_recommender12.recommendations

    def test_flat_limited_insulation(self):
-        property_instance13 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance13 = Property(id=12, address="fake", postcode="fake")
        property_instance13.age_band = "D"
        property_instance13.insulation_floor_area = 40
        property_instance13.perimeter = 40
@ -363,7 +362,7 @@ class TestRoofRecommendations:
               "Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"

    def test_property_above(self):
-        property_instance14 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance14 = Property(id=0, address="fake", postcode="fake")
        property_instance14.age_band = "F"
        property_instance14.insulation_floor_area = 100
        property_instance14.roof = {
--- a/recommendations/tests/test_solar_pv_recommendations.py
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@ -0,0 +1,79 @@
+import pytest
+from recommendations.SolarPvRecommendations import SolarPvRecommendations
+from backend.Property import Property
+
+
+class TestSolarPvRecommendations:
+    @pytest.fixture
+    def property_instance_invalid_type(self):
+        # Setup the property_instance with an invalid property type
+        property_instance_invalid_type = Property(id=1, address="", postcode="")
+        property_instance_invalid_type.data = {
+            "property-type": "InvalidType", "county": "Broxbourne", "photo-supply": None
+        }
+        property_instance_invalid_type.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
+        return property_instance_invalid_type
+
+    @pytest.fixture
+    def property_instance_invalid_roof(self):
+        # Setup the property_instance with invalid roof type
+        property_instance_invalid_roof = Property(id=1, address="", postcode="")
+        property_instance_invalid_roof.data = {
+            "county": "Huntingdonshire", "property-type": "House", "photo-supply": None
+        }
+        property_instance_invalid_roof.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
+        return property_instance_invalid_roof
+
+    @pytest.fixture
+    def property_instance_has_solar_pv(self):
+        # Setup the property_instance without existing solar pv
+        property_instance_has_solar_pv = Property(id=1, address="", postcode="")
+        property_instance_has_solar_pv.data = {"photo-supply": "40", "county": "Huntingdonshire",
+                                               "property-type": "House"}
+        property_instance_has_solar_pv.roof = {"is_flat": True}
+        return property_instance_has_solar_pv
+
+    @pytest.fixture
+    def property_instance_valid_all(self):
+        # Setup a valid property_instance that passes all conditions
+        property_instance_valid_all = Property(id=1, address="", postcode="")
+        property_instance_valid_all.solar_pv_roof_area = 20
+        property_instance_valid_all.solar_pv_percentage = 40
+        property_instance_valid_all.data = {"property-type": "House", "photo-supply": None, "county": "Huntingdonshire"}
+        property_instance_valid_all.roof = {"is_flat": True}
+        return property_instance_valid_all
+
+    def test_invalid_property_type(self, property_instance_invalid_type):
+        solar_pv = SolarPvRecommendations(property_instance_invalid_type)
+        solar_pv.recommend()
+        assert not solar_pv.recommendation
+
+    def test_invalid_roof_type(self, property_instance_invalid_roof):
+        solar_pv = SolarPvRecommendations(property_instance_invalid_roof)
+        solar_pv.recommend()
+        assert not solar_pv.recommendation
+
+    def test_existing_solar_pv(self, property_instance_has_solar_pv):
+        solar_pv = SolarPvRecommendations(property_instance_has_solar_pv)
+        solar_pv.recommend()
+        assert not solar_pv.recommendation
+
+    def test_valid_all_conditions(self, property_instance_valid_all):
+        solar_pv = SolarPvRecommendations(property_instance_valid_all)
+        solar_pv.recommend()
+        assert solar_pv.recommendation == [
+            {
+                'parts': [],
+                'type': 'solar_pv',
+                'description': 'Install a 4 kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof',
+                'starting_u_value': None,
+                'new_u_value': None,
+                'sap_points': None,
+                'total': 8527.0752,
+                'subtotal': 7105.896,
+                'vat': 1421.1791999999996,
+                'labour_hours': 72,
+                'labour_days': 2,
+                'photo_supply': 4000
+            }
+        ]
--- a/recommendations/tests/test_ventilation_recommendations.py
+++ b/recommendations/tests/test_ventilation_recommendations.py
@ -1,5 +1,4 @@
 from backend.Property import Property
-from unittest.mock import Mock
 from recommendations.VentilationRecommendations import VentilationRecommendations
 from recommendations.tests.test_data.materials import materials

@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
 class TestVentilationRecommendations:

    def test_natural_ventilation(self):
-        input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property1.data = {"mechanical-ventilation": "natural"}

        recommender = VentilationRecommendations(
@ -28,7 +27,7 @@ class TestVentilationRecommendations:
        assert recommender.recommendation[0]["parts"][0]["quantity"] == 2

    def test_missing_ventilation(self):
-        input_property2 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property2 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property2.data = {"mechanical-ventilation": None}

        recommender2 = VentilationRecommendations(
@ -49,7 +48,7 @@ class TestVentilationRecommendations:
        assert recommender2.recommendation[0]["parts"][0]["quantity"] == 2

    def test_nodata_ventilation(self):
-        input_property3 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property3 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property3.data = {"mechanical-ventilation": "NO DATA!!"}

        recommender3 = VentilationRecommendations(
@ -70,7 +69,7 @@ class TestVentilationRecommendations:
        assert recommender3.recommendation[0]["parts"][0]["quantity"] == 2

    def test_existing_ventilation_1(self):
-        input_property4 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property4 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property4.data = {"mechanical-ventilation": 'mechanical, extract only'}

        recommender4 = VentilationRecommendations(
@ -86,7 +85,7 @@ class TestVentilationRecommendations:
        assert recommender4.has_ventilaion

    def test_existing_ventilation_2(self):
-        input_property5 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property5 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property5.data = {"mechanical-ventilation": 'mechanical, supply and extract'}

        recommender5 = VentilationRecommendations(
--- a/recommendations/tests/test_wall_recommendations.py
+++ b/recommendations/tests/test_wall_recommendations.py
@ -231,7 +231,7 @@ class TestWallRecommendationsBase:
 class TestCavityWallRecommensations:

    def test_fill_empty_cavity(self):
-        input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock())
+        input_property = Property(id=1, postcode="F4k3", address="123 fake street")
        input_property.walls = {
            'original_description': 'Cavity wall, as built, no insulation (assumed)',
            'clean_description': 'Cavity wall, as built, no insulation',
@ -265,7 +265,7 @@ class TestCavityWallRecommensations:
        assert np.isclose(recommender.recommendations[1]["total"], 2004.6600000000003)

    def test_fill_partial_filled_cavity(self):
-        input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock())
+        input_property = Property(id=1, postcode="F4k3", address="123 fake street")
        input_property.walls = {
            'original_description': 'Cavity wall, as built, partial insulation (assumed)',
            'clean_description': 'Cavity wall, as built, partial insulation',
@ -299,7 +299,7 @@ class TestCavityWallRecommensations:
        assert np.isclose(recommender.recommendations[1]["total"], 1999.9350000000002)

    def test_system_built_wall(self):
-        input_property2 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+        input_property2 = Property(id=1, postcode="F4k3 2", address="223 fake street")
        input_property2.walls = {
            'original_description': 'System built, as built, no insulation (assumed)',
            'clean_description': 'System built, as built, no insulation',
@ -331,22 +331,22 @@ class TestCavityWallRecommensations:
        assert len(recommender2.recommendations) == 9
        assert recommender2.estimated_u_value == 1
        assert np.isclose(recommender2.recommendations[0]["new_u_value"], 0.19)
-        assert np.isclose(recommender2.recommendations[0]["total"], 15899.9616)
+        assert np.isclose(recommender2.recommendations[0]["total"], 16429.960320000002)
        assert recommender2.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender2.recommendations[0]["parts"][0]["depth"] == 100

        assert np.isclose(recommender2.recommendations[8]["new_u_value"], 0.23)
-        assert np.isclose(recommender2.recommendations[8]["total"], 10916.3424)
+        assert np.isclose(recommender2.recommendations[8]["total"], 11292.768)
        assert recommender2.recommendations[8]["parts"][0]["type"] == "internal_wall_insulation"
        assert recommender2.recommendations[8]["parts"][0]["depth"] == 72.5

        assert np.isclose(recommender2.recommendations[6]["new_u_value"], 0.29)
-        assert np.isclose(recommender2.recommendations[6]["total"], 10621.934399999998)
+        assert np.isclose(recommender2.recommendations[6]["total"], 10988.208)
        assert recommender2.recommendations[6]["parts"][0]["type"] == "internal_wall_insulation"
        assert recommender2.recommendations[6]["parts"][0]["depth"] == 52.5

    def test_timber_frame_wall(self):
-        input_property3 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+        input_property3 = Property(id=1, postcode="F4k3 2", address="223 fake street")
        input_property3.walls = {
            'original_description': 'Timber frame, as built, no insulation (assumed)',
            'clean_description': 'Timber frame, as built, no insulation',
@ -378,17 +378,17 @@ class TestCavityWallRecommensations:
        assert len(recommender3.recommendations) == 6
        assert recommender3.estimated_u_value == 1.9
        assert np.isclose(recommender3.recommendations[0]["new_u_value"], 0.2)
-        assert np.isclose(recommender3.recommendations[0]["total"], 13117.46832)
+        assert np.isclose(recommender3.recommendations[0]["total"], 13554.717263999999)
        assert recommender3.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender3.recommendations[0]["parts"][0]["depth"] == 100.0

        assert np.isclose(recommender3.recommendations[1]["new_u_value"], 0.23)
-        assert np.isclose(recommender3.recommendations[1]["total"], 34070.50944)
+        assert np.isclose(recommender3.recommendations[1]["total"], 35206.19308800001)
        assert recommender3.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender3.recommendations[1]["parts"][0]["depth"] == 150.0

    def test_granite_or_whinstone_wall(self):
-        input_property4 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+        input_property4 = Property(id=1, postcode="F4k3 2", address="223 fake street")
        input_property4.walls = {
            'original_description': 'Granite or whinstone, as built, no insulation (assumed)',
            'clean_description': 'Granite or whinstone, as built, no insulation',
@ -420,17 +420,17 @@ class TestCavityWallRecommensations:
        assert len(recommender4.recommendations) == 6
        assert recommender4.estimated_u_value == 2.3
        assert np.isclose(recommender4.recommendations[0]["new_u_value"], 0.21)
-        assert np.isclose(recommender4.recommendations[0]["total"], 28562.514352)
+        assert np.isclose(recommender4.recommendations[0]["total"], 29547.42864)
        assert recommender4.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender4.recommendations[0]["parts"][0]["depth"] == 100

        assert np.isclose(recommender4.recommendations[1]["new_u_value"], 0.23)
-        assert np.isclose(recommender4.recommendations[1]["total"], 74186.52678400002)
+        assert np.isclose(recommender4.recommendations[1]["total"], 76744.68288000001)
        assert recommender4.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender4.recommendations[1]["parts"][0]["depth"] == 150

    def test_cob_wall(self):
-        input_property5 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+        input_property5 = Property(id=1, postcode="F4k3 2", address="223 fake street")
        input_property5.walls = {
            'original_description': 'Cob, as built',
            'clean_description': 'Cob, as built',
@ -462,17 +462,17 @@ class TestCavityWallRecommensations:
        assert len(recommender5.recommendations) == 5
        assert recommender5.estimated_u_value == 0.8
        assert np.isclose(recommender5.recommendations[0]["new_u_value"], 0.29)
-        assert np.isclose(recommender5.recommendations[0]["total"], 8665.040384000002)
+        assert np.isclose(recommender5.recommendations[0]["total"], 8963.834880000002)
        assert recommender5.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender5.recommendations[0]["parts"][0]["depth"] == 50

        assert np.isclose(recommender5.recommendations[3]["new_u_value"], 0.26)
-        assert np.isclose(recommender5.recommendations[3]["total"], 20078.742992)
+        assert np.isclose(recommender5.recommendations[3]["total"], 20771.11344)
        assert recommender5.recommendations[3]["parts"][0]["type"] == "internal_wall_insulation"
        assert recommender5.recommendations[3]["parts"][0]["depth"] == 100

    def test_sandstone_or_limestone_wall(self):
-        input_property6 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property6 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property6.walls = {
            'original_description': 'Sandstone or limestone, as built, no insulation (assumed)',
            'clean_description': 'Sandstone or limestone, as built, no insulation',
@ -504,16 +504,16 @@ class TestCavityWallRecommensations:
        assert len(recommender6.recommendations) == 9
        assert recommender6.estimated_u_value == 1
        assert np.isclose(recommender6.recommendations[0]["new_u_value"], 0.19)
-        assert np.isclose(recommender6.recommendations[0]["total"], 44829.0584)
+        assert np.isclose(recommender6.recommendations[0]["total"], 46374.888000000006)
        assert recommender6.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender6.recommendations[0]["parts"][0]["depth"] == 100

        assert np.isclose(recommender6.recommendations[2]["new_u_value"], 0.21)
-        assert np.isclose(recommender6.recommendations[2]["total"], 116436.25280000002)
+        assert np.isclose(recommender6.recommendations[2]["total"], 120451.29600000002)
        assert recommender6.recommendations[2]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender6.recommendations[2]["parts"][0]["depth"] == 150

        assert np.isclose(recommender6.recommendations[4]["new_u_value"], 0.28)
-        assert np.isclose(recommender6.recommendations[4]["total"], 91267.0136)
+        assert np.isclose(recommender6.recommendations[4]["total"], 94414.15199999999)
        assert recommender6.recommendations[4]["parts"][0]["type"] == "internal_wall_insulation"
        assert recommender6.recommendations[4]["parts"][0]["depth"] == 100
--- a/recommendations/tests/test_window_recommendations.py
+++ b/recommendations/tests/test_window_recommendations.py
@ -0,0 +1,252 @@
+from recommendations.WindowsRecommendations import WindowsRecommendations
+from backend.Property import Property
+from recommendations.tests.test_data.materials import materials
+
+
+class TestWindowRecommendations:
+
+    def test_fully_single_glazed(self):
+        """
+        For this property, we expect all windows to be single glazed and should recommend full double glazing
+        :return:
+        """
+
+        property_1 = Property(
+            id=1,
+            postcode='1',
+            address='1',
+            data={
+                "county": "Wychavon",
+                "multi-glaze-proportion": 0,
+                "uprn": 0
+            }
+        )
+        property_1.windows = {
+            'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': 'full',
+            'glazing_type': 'single',
+            'no_data': False
+        }
+        property_1.number_of_windows = 7
+
+        recommender = WindowsRecommendations(property_instance=property_1, materials=materials)
+
+        assert not recommender.recommendation
+
+        recommender.recommend()
+
+        assert recommender.recommendation == [
+            {'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to all windows',
+             'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 5721.943248,
+             'subtotal': 4768.28604, 'vat': 953.6572080000001, 'contingency': 340.59186, 'preliminaries': 340.59186,
+             'material': 1275.75, 'profit': 681.18372, 'labour_hours': 45.5, 'labour_cost': 994.8624,
+             'labour_days': 2.84375, 'is_secondary_glazing': False}]
+
+    def test_partial_double_glazed(self):
+        """
+        For this property, the double glazing is describes as partial, therefore we recommend completion of
+        double glazing
+        :return:
+        """
+
+        property_2 = Property(
+            id=1,
+            postcode='1',
+            address='1',
+            data={
+                "county": "Wychavon",
+                "multi-glaze-proportion": 33,
+                "uprn": 0
+            }
+        )
+        property_2.windows = {'original_description': 'Mostly double glazing', 'has_glazing': True,
+                              'glazing_coverage': 'most',
+                              'glazing_type': 'double', 'no_data': False}
+        property_2.number_of_windows = 7
+
+        recommender2 = WindowsRecommendations(property_instance=property_2, materials=materials)
+
+        assert not recommender2.recommendation
+
+        recommender2.recommend()
+
+        assert recommender2.recommendation == [
+            {'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
+             'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 4087.10232,
+             'subtotal': 3405.9186, 'vat': 681.18372, 'contingency': 243.2799, 'preliminaries': 243.2799,
+             'material': 911.25, 'profit': 486.5598, 'labour_hours': 32.5, 'labour_cost': 710.6160000000001,
+             'labour_days': 2.03125, 'is_secondary_glazing': False}]
+
+    def test_fully_double_glazed(self):
+        """
+        This property has full double glazing so we shouldn't recommend anything
+        :return:
+        """
+
+        property_3 = Property(
+            id=1,
+            postcode='1',
+            address='1',
+            data={
+                "county": "Wychavon",
+                "multi-glaze-proportion": 80,
+                "uprn": 0
+            }
+        )
+        property_3.windows = {'original_description': 'Fully double glazed', 'has_glazing': True,
+                              'glazing_coverage': 'full',
+                              'glazing_type': 'double', 'no_data': False}
+        property_3.number_of_windows = 7
+
+        recommender3 = WindowsRecommendations(property_instance=property_3, materials=materials)
+
+        assert not recommender3.recommendation
+
+        recommender3.recommend()
+
+        assert not recommender3.recommendation
+
+    def test_fully_secondary_glazed(self):
+        property_4 = Property(
+            id=1,
+            postcode='1',
+            address='1',
+            data={
+                "county": "Wychavon",
+                "multi-glaze-proportion": 100,
+                "uprn": 0
+            }
+        )
+        property_4.windows = {'original_description': 'Full secondary glazing', 'has_glazing': True,
+                              'glazing_coverage': 'full',
+                              'glazing_type': 'secondary', 'no_data': False}
+        property_4.number_of_windows = 7
+
+        recommender4 = WindowsRecommendations(property_instance=property_4, materials=materials)
+
+        assert not recommender4.recommendation
+
+        recommender4.recommend()
+
+        assert not recommender4.recommendation
+
+    def test_partial_secondary_glazing(self):
+        property_5 = Property(
+            id=1,
+            postcode='1',
+            address='1',
+            data={
+                "county": "Wychavon",
+                "multi-glaze-proportion": 50,
+                "uprn": 0
+            }
+        )
+        property_5.windows = {'original_description': 'Partial secondary glazing', 'has_glazing': True,
+                              'glazing_coverage': 'partial',
+                              'glazing_type': 'secondary', 'no_data': False}
+        property_5.number_of_windows = 7
+
+        recommender5 = WindowsRecommendations(property_instance=property_5, materials=materials)
+
+        assert not recommender5.recommendation
+
+        recommender5.recommend()
+
+        assert recommender5.recommendation == [
+            {'parts': [], 'type': 'windows_glazing',
+             'description': 'Install secondary glazing to the remaining windows',
+             'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1089.893952,
+             'subtotal': 908.24496, 'vat': 181.64899200000002, 'contingency': 64.87464, 'preliminaries': 64.87464,
+             'material': 729.0, 'profit': 129.74928, 'labour_hours': 13.0, 'labour_cost': 568.4928,
+             'labour_days': 0.8125, 'is_secondary_glazing': True}]
+
+    def test_single_glazed_restricted_measures(self):
+        property_6 = Property(
+            id=1,
+            postcode='1',
+            address='1',
+            data={
+                "county": "Wychavon",
+                "multi-glaze-proportion": 0,
+                "uprn": 0
+            }
+        )
+        property_6.windows = {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
+                              'glazing_type': 'single',
+                              'no_data': False}
+        property_6.number_of_windows = 7
+        property_6.restricted_measures = True
+        property_6.is_heritage = True
+
+        recommender6 = WindowsRecommendations(property_instance=property_6, materials=materials)
+
+        assert not recommender6.recommendation
+
+        recommender6.recommend()
+
+        assert recommender6.recommendation == [
+            {'parts': [], 'type': 'windows_glazing',
+             'description': 'Install secondary glazing to all windows. Secondary '
+                            'glazing recommended due to herigate building status',
+             'starting_u_value': None, 'new_u_value': None, 'sap_points': None,
+             'total': 1907.314416, 'subtotal': 1589.42868, 'vat': 317.885736,
+             'contingency': 113.53062, 'preliminaries': 113.53062,
+             'material': 1275.75, 'profit': 227.06124, 'labour_hours': 22.75,
+             'labour_cost': 994.8624, 'labour_days': 1.421875, 'is_secondary_glazing': True}
+        ]
+
+    def test_full_triple_glazed(self):
+        property_7 = Property(
+            id=1,
+            postcode='1',
+            address='1',
+            data={
+                "county": "Wychavon",
+                "multi-glaze-proportion": 100,
+                "uprn": 0
+            }
+        )
+        property_7.windows = {'original_description': 'Fully triple glazed', 'has_glazing': True,
+                              'glazing_coverage': 'full',
+                              'glazing_type': 'triple', 'no_data': False}
+        property_7.number_of_windows = 7
+
+        recommender7 = WindowsRecommendations(property_instance=property_7, materials=materials)
+
+        assert not recommender7.recommendation
+
+        recommender7.recommend()
+
+        assert not recommender7.recommendation
+
+    def test_partial_triple_glazed(self):
+        """
+        We should just recommend double glazing to the remaining windows, since it's a cheaper option
+        """
+
+        property_8 = Property(
+            id=1,
+            postcode='1',
+            address='1',
+            data={
+                "county": "Wychavon",
+                "multi-glaze-proportion": 80,
+                "uprn": 1
+            }
+        )
+        property_8.windows = {'original_description': 'Mostly triple glazing', 'has_glazing': True,
+                              'glazing_coverage': 'most',
+                              'glazing_type': 'triple', 'no_data': False}
+        property_8.number_of_windows = 7
+
+        recommender8 = WindowsRecommendations(property_instance=property_8, materials=materials)
+
+        assert not recommender8.recommendation
+
+        recommender8.recommend()
+
+        assert recommender8.recommendation == [
+            {'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
+             'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1634.840928,
+             'subtotal': 1362.36744, 'vat': 272.47348800000003, 'contingency': 97.31196, 'preliminaries': 97.31196,
+             'material': 364.5, 'profit': 194.62392, 'labour_hours': 13.0, 'labour_cost': 284.2464,
+             'labour_days': 0.8125, 'is_secondary_glazing': False}]