Merge pull request #380 from Hestia-Homes/main

Major deployment
2026-06-08 11:17:27 +00:00 · 2025-04-14 12:03:40 +01:00 · 2025-04-14 12:03:40 +01:00 · 4b9ebb008f
commit 4b9ebb008f
parent 54ce22fce3 76d8df9f32
101 changed files with 19989 additions and 568 deletions
--- a/.gitignore
+++ b/.gitignore
@ -268,4 +268,11 @@ adhoc
 adhoc/*

 etl-router-venv/
-refactor_datasets/
+refactor_datasets/
+
+etl/eligibility/ha_15_32/
+cache/
+*/.idea
+
+*.png
+*.pptx
--- a/.idea/terraform.xml
+++ b/.idea/terraform.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="TerraformProjectSettings">
+    <option name="toolPath" value="/opt/homebrew/bin/terraform" />
+  </component>
+</project>
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
--- a/asset_list/DataMapper.py
+++ b/asset_list/DataMapper.py
@ -0,0 +1,178 @@
+# OpenAI API Key (set this in your environment variables for security)
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+
+
+class DataRemapper:
+    def __init__(self, standard_values, standard_map=None, max_tokens=1000):
+        """
+        Initialize the remapper with standard values and a predefined mapping.
+
+        :param standard_values: Set of allowed standardized values.
+        :param standard_map: Dictionary of common remappings {raw_value: standard_value}.
+        """
+        self.standard_values = standard_values
+        self.standard_map = standard_map
+        self.fuzzy_threshold = 90  # Adjust fuzzy matching sensitivity
+        self.ai_model = "gpt-4-turbo"  # Use gpt-3.5-turbo for cheaper processing
+
+        # Tokenizer for counting tokens
+        self.tokenizer = tiktoken.encoding_for_model(self.ai_model)
+
+        # Track token usage and remap dictionary
+        self.total_tokens_used = 0
+        self.total_cost = 0
+        self.remap_dict = {}  # {original_value: standardized_value}
+        self.max_tokens = max_tokens  # Limit for OpenAI API
+
+        # Memoization for AI calls
+        self.ai_cache = {}  # {tuple(unmapped_values): {original_value: standardized_value}}
+        # Capture the reponse for debugging
+        self.ai_response = None
+
+        # OpenAI pricing (as of Feb 2024)
+        self.pricing = {
+            "gpt-4-turbo": {"input": 0.01 / 1000, "output": 0.03 / 1000},
+            "gpt-3.5-turbo": {"input": 0.0015 / 1000, "output": 0.002 / 1000},
+        }
+
+        self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
+
+    @staticmethod
+    def clean_string(text):
+        """Basic text cleaning: remove extra spaces, punctuation, and normalize case."""
+        if not isinstance(text, str):
+            return None
+        text = text.strip().lower()
+        text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
+        # Replace double strings
+        text = re.sub(r'\s+', ' ', text)
+        return text
+
+    def fuzzy_match(self, text):
+        """Use fuzzy matching to find the closest standard value."""
+        match, score = process.extractOne(text, self.standard_values) if text else (None, 0)
+        return match if score >= self.fuzzy_threshold else None
+
+    def count_tokens(self, text):
+        """Estimate the number of tokens in a given text."""
+        return len(self.tokenizer.encode(text)) if text else 0
+
+    def ai_standardize(self, unmapped_values):
+        """Call OpenAI API **once** for all unmapped values to minimize cost, with memoization."""
+        if not unmapped_values:
+            return {}
+
+        unmapped_tuple = tuple(sorted(unmapped_values))  # Ensure consistency for memoization
+        if unmapped_tuple in self.ai_cache:
+            return self.ai_cache[unmapped_tuple]  # Return memoized result
+
+        prompt = f"""
+        You are an expert in data classification. Standardize each of these values into one of the categories: 
+        {list(self.standard_values)}. 
+
+        Return only a JSON dictionary where:
+        - The keys are the original values.
+        - The values are the standardized ones.
+
+        Strictly return JSON **without markdown formatting** or extra text.
+
+        Example Output:
+        {{
+            "BLKHOUS": "block house",
+            "BEDSIT": "bedsit"
+        }}
+
+        Values to standardize:
+        {unmapped_values}
+        """
+
+        # Count input tokens
+        input_tokens = self.count_tokens(prompt)
+        if input_tokens > self.max_tokens:
+            raise ValueError("Input tokens exceed the maximum limit.")
+
+        logger.info("Calling OpenAI API for standardization...")
+        response = self.openai_client.chat.completions.create(
+            model=self.ai_model,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=self.max_tokens,
+            temperature=0.1,
+        )
+
+        output_text = response.choices[0].message.content.strip()
+        output_tokens = self.count_tokens(output_text)  # Count output tokens
+
+        # Track total token usage
+        self.total_tokens_used += input_tokens + output_tokens
+
+        # Estimate cost
+        input_cost = input_tokens * self.pricing[self.ai_model]["input"]
+        output_cost = output_tokens * self.pricing[self.ai_model]["output"]
+        self.total_cost += input_cost + output_cost
+
+        try:
+            # Parse response as dictionary
+            mapping = eval(output_text)  # OpenAI should return a valid dictionary
+        except:
+            mapping = {val: "unknown" for val in unmapped_values}  # Fallback
+
+        # Memoize the AI response
+        self.ai_cache[unmapped_tuple] = mapping
+        # We store the raw AI response for debugging
+        logger.debug(f"AI Response: {mapping}")
+        self.ai_response = output_text
+
+        return mapping
+
+    def standardize_list(self, values_to_remap):
+        """
+        Standardizes a list of values and returns a dictionary {original_value: standardized_value}.
+
+        :param values_to_remap: List of raw values to standardize.
+        :return: Dictionary {original_value: standardized_value}.
+        """
+        unique_values = set(values_to_remap)  # Process only unique values
+
+        unmapped_values = []
+        for value in unique_values:
+            if pd.isna(value):  # Handle NaN values
+                self.remap_dict[value] = "unknown"
+                continue
+
+            cleaned_value = self.clean_string(value)
+
+            # Rule-Based Check (Predefined Mapping)
+            if cleaned_value in self.standard_map or value in self.standard_map:
+                self.remap_dict[value] = (
+                    self.standard_map[cleaned_value] if cleaned_value in self.standard_map else self.standard_map[value]
+                )
+                continue
+
+            if value.lower() in self.standard_map:
+                self.remap_dict[value] = self.standard_map[value.lower()]
+                continue
+
+            # Exact Match in Standard Values
+            if cleaned_value in self.standard_values:
+                self.remap_dict[value] = cleaned_value
+                continue
+
+            # Fuzzy Matching
+            fuzzy_match = self.fuzzy_match(cleaned_value)
+            if fuzzy_match:
+                self.remap_dict[value] = fuzzy_match
+                continue
+
+            # Capture anything that wasn't mapped
+            unmapped_values.append(value)
+
+        # AI Model - remap anything unmapped (batch request)
+        ai_mapping = self.ai_standardize(unmapped_values)
+        self.remap_dict.update(ai_mapping)
+
+        return self.remap_dict
+
+    def report_usage(self):
+        """Prints a summary of token usage and cost."""
+        print(f"\n🔹 Total Tokens Used: {self.total_tokens_used}")
+        print(f"💰 Estimated Cost: ${self.total_cost:.4f}")
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -0,0 +1,953 @@
+import os
+import json
+import pandas as pd
+from pprint import pprint
+import msgpack
+from utils.s3 import read_from_s3
+from asset_list.AssetList import AssetList
+from asset_list.mappings.property_type import PROPERTY_MAPPING
+from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
+from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS
+from asset_list.mappings.heating_systems import HEATING_MAPPINGS
+from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS
+from asset_list.mappings.roof import ROOF_CONSTRUCTION_MAPPINGS
+from asset_list.utils import get_data
+
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
+    if method == "first_two_words":
+        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
+        return asset_list
+
+    if method == "first_word":
+        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
+        return asset_list
+
+    if method == "house_number_extraction":
+        asset_list["address1_extracted"] = asset_list.apply(
+            lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
+            axis=1
+        )
+        return asset_list
+
+    raise ValueError(f"Method {method} not recognized")
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by Livewest
+
+    Data request contents:
+    Date of last EPC
+    Reason for EPC
+    SAP score on register
+    Property Type
+    Property Area
+    Property Age
+    Any Dimensions (HLP,PW,RH)
+    Property Wall Construction
+    Heating Type
+    Secondary Heating
+    Loft Insulation Depth
+
+    Additional if possible:
+    Heat loss calculations
+    EPC recommendations
+    Property UPRN
+    """
+
+    # TODO:
+    # For cavity work:
+    # - Flag any entries that have a different wall type between non-intrusive data against EPC
+    # - Worth double checking entries that have a difference in wall construction
+    # - Look at anything that is flagged as an empty cavity but the EPC data says it’s a filled cavity
+    # - Look at the current EPC scores - Anything that is C75 or above, especially if it’s assumed no insulation
+    # - By postcode, we can try and deduce if all of the addresses are a flats and then estimate if 50% of the flats
+    # are less than C75
+    # - Flag anything pre SAP2012
+    # - Flag anything over 5 years old
+    # - Look at year built vs age band
+    #
+    # For Solar:
+    # - Discount any that have solar PV - based on non-intrusives and from the inspections team
+    # - In the heating, discount anything that isn’t ashp, ghsp, hhrs, electric storage - possibly homes with
+    # electric room heaters but it might need to be an EPC E
+    # - Fabric - check the floor, wall and roof:
+    #     - Filled or empty cavity is good
+    #     - Insulated solid/timber/system built is good
+    #     - SCIS/CEG needs solid floors
+    #     - JJC don’t care
+    #     - Anything with a loft 200 or below
+    # - Anything C75 and above won’t qualify
+    # - Insulated loft = 200mm
+    # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
+    # - Or the insulation required is loft/cavity (floors should be solid)
+
+    # Bromford
+    data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
+                   "Rebuild/Prepared data/")
+    data_filename = "asset_list.xlsx"
+    sheet_name = "Sheet1"
+    postcode_column = 'PostCode'
+    fulladdress_column = "FullAddress"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    landlord_year_built = "ConYear"
+    landlord_os_uprn = None
+    landlord_property_type = "AssetTypeDesc"
+    landlord_built_form = "PropTypeDesc"
+    landlord_wall_construction = "Construction type"
+    landlord_roof_construction = None
+    landlord_heating_system = "Heating Type"
+    landlord_existing_pv = None
+    landlord_property_id = "Asset"
+    landlord_sap = None
+    outcomes_filename = "outcomes.xlsx"
+    outcomes_sheetname = "Sheet1"
+    outcomes_postcode = "Postcode"
+    outcomes_houseno = "No"
+    outcomes_id = None
+    outcomes_address = "Address"
+    master_filepaths = [
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
+        "3 submissions.csv",
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
+        "4 submissions.csv",
+    ]
+    master_to_asset_list_filepath = None
+    phase = False
+
+    # Torus
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
+    data_filename = "Torus Property Asset List - Phase 1.xlsx"
+    sheet_name = "TORUS"
+    postcode_column = 'Postcode'
+    fulladdress_column = None
+    address1_column = "AddressLine1"
+    address1_method = None
+    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
+    missing_postcodes_method = None
+    landlord_year_built = "Property Age"
+    landlord_os_uprn = "NatUPRN"
+    landlord_property_type = "Property Type"
+    landlord_built_form = "Built Form"
+    landlord_wall_construction = "Wall Construction"
+    landlord_roof_construction = "Roof Construction"
+    landlord_heating_system = "Space Heating Source"
+    landlord_existing_pv = "Low Carbon Technology (Solar PV)"
+    landlord_property_id = "UPRN"
+    landlord_sap = "SAP Score"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    outcomes_address = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+    phase = True
+
+    # Ealing - houses
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing"
+    data_filename = "Ealing_rechecked_cleaned_05042025.csv"
+    sheet_name = None
+    postcode_column = 'Postcode'
+    fulladdress_column = "Address"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    landlord_year_built = "Year Built"
+    landlord_os_uprn = None
+    landlord_property_type = "Property Type Code"
+    landlord_built_form = None
+    landlord_wall_construction = None
+    landlord_heating_system = None
+    landlord_existing_pv = None
+    landlord_property_id = "Property ref"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    outcomes_address = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+
+    # Southern Midlands
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
+    data_filename = "Southern Housing Midlands Property List - combined.xlsx"
+    sheet_name = "Sheet 1"
+    postcode_column = 'Post Code'
+    fulladdress_column = "Address"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    landlord_year_built = "Age_1"
+    landlord_os_uprn = None
+    landlord_property_type = "Prop_Type"
+    landlord_built_form = "Prop_Type"
+    landlord_wall_construction = "Walls_P"
+    landlord_heating_system = "Heating System"
+    landlord_existing_pv = None
+    landlord_property_id = "AssetID"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    outcomes_address = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+
+    # Live West (2018 Asset list)
+    data_folder = (
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset List"
+    )
+    data_filename = "LIVEWEST  STOCK - 23rd October 2018.xlsx"
+    sheet_name = "Assets"
+    postcode_column = 'Postcode'
+    fulladdress_column = "Address"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    landlord_year_built = "Build Year"
+    landlord_os_uprn = None
+    landlord_property_type = "Property Archetype"
+    landlord_built_form = None
+    landlord_wall_construction = None
+    landlord_heating_system = "Heating Fuel Type"
+    landlord_existing_pv = None
+    landlord_property_id = "Uprn - DO NOT DELETE"
+    outcomes_filename = "RT - LiveWest.xlsx"
+    outcomes_sheetname = "Feedback"
+    outcomes_postcode = "Poscode"
+    outcomes_houseno = "No."
+    outcomes_id = "UPRN"
+    master_filepaths = [
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master "
+        "- redacted for analysis/CAVITY-Table 1.csv"
+    ]
+    master_to_asset_list_filepath = None
+
+    # Live West (South West asset list)
+    data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March "
+                   "2025/Livewest Asset List (Original) - csv")
+    data_filename = "Report-Table 1.csv"
+    sheet_name = None
+    postcode_column = 'Postcode'
+    fulladdress_column = "T1_Address"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    landlord_year_built = "Build Yr"
+    landlord_os_uprn = None
+    landlord_property_type = "T1_AssetType"
+    landlord_built_form = "T1_AssetType"
+    landlord_wall_construction = "Wall Type Cavity"
+    landlord_heating_system = "Heating Fuel"
+    landlord_existing_pv = None
+    landlord_property_id = "T1_UPRN"
+    outcomes_filename = "RT - LiveWest.xlsx"
+    outcomes_sheetname = "Feedback"
+    outcomes_postcode = "Poscode"
+    outcomes_houseno = "No."
+    outcomes_id = "UPRN"
+    master_filepaths = [
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master "
+        "- redacted for analysis/CAVITY-Table 1.csv"
+    ]
+    master_to_asset_list_filepath = None
+
+    # PFP London
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London"
+    data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx"
+    sheet_name = "PFP SURROUNDING LONDON"
+    postcode_column = 'Postcode'
+    fulladdress_column = None
+    address1_column = "AddressLine1"
+    address1_method = None
+    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
+    missing_postcodes_method = None
+    landlord_year_built = None
+    landlord_os_uprn = None
+    landlord_property_type = "Archetype (PFP)"
+    landlord_built_form = "Archetype (PFP)"
+    landlord_wall_construction = None
+    landlord_heating_system = None
+    landlord_existing_pv = None
+    landlord_property_id = "Uprn"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+
+    # PFP North-West
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
+    data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
+    sheet_name = "CHECKED"
+    postcode_column = 'Postcode'
+    fulladdress_column = None
+    address1_column = "AddressLine1"
+    address1_method = None
+    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
+    missing_postcodes_method = None
+    landlord_year_built = None
+    landlord_os_uprn = None
+    landlord_property_type = "Archetype (PFP)"
+    landlord_built_form = "Archetype (PFP)"
+    landlord_wall_construction = None
+    landlord_heating_system = None
+    landlord_existing_pv = None
+    landlord_property_id = "Uprn"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+
+    # PFP North-East
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East"
+    data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx"
+    sheet_name = "CHECKED"
+    postcode_column = 'Postcode'
+    fulladdress_column = None
+    address1_column = "AddressLine1"
+    address1_method = None
+    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
+    missing_postcodes_method = None
+    landlord_year_built = None
+    landlord_os_uprn = None
+    landlord_property_type = "Archetype (PFP)"
+    landlord_built_form = "Archetype (PFP)"
+    landlord_wall_construction = None
+    landlord_heating_system = None
+    landlord_existing_pv = None
+    landlord_property_id = "Uprn"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+
+    # PFP East
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East"
+    data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx"
+    sheet_name = "PFP EAST"
+    postcode_column = 'Postcode'
+    fulladdress_column = None
+    address1_column = "AddressLine1"
+    address1_method = None
+    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
+    missing_postcodes_method = None
+    landlord_year_built = None
+    landlord_os_uprn = None
+    landlord_property_type = "Archetype (PFP)"
+    landlord_built_form = "Archetype (PFP)"
+    landlord_wall_construction = None
+    landlord_heating_system = None
+    landlord_existing_pv = None
+    landlord_property_id = "Uprn"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+
+    # Wates
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - "
+    data_filename = "ECO 4 Wates.xlsx"
+    sheet_name = "Roadmap Homes"
+    postcode_column = 'Postcode'
+    fulladdress_column = None
+    address1_column = "Address Line 1"
+    address1_method = None
+    address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"]
+    missing_postcodes_method = None
+    landlord_year_built = "Build Year"
+    landlord_os_uprn = None
+    landlord_property_type = "Archetype"
+    landlord_built_form = "Archetype"
+    landlord_wall_construction = "Wall"
+    landlord_heating_system = "Heating Type"
+    landlord_existing_pv = None
+    landlord_property_id = "UPRN"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+
+    # Ealing
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025"
+    # data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx"
+    # sheet_name = "IGNORE - FULL MAIN"
+    # postcode_column = 'Postcode'
+    # fulladdress_column = "Address"
+    # address1_column = None
+    # address1_method = "first_word"
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "Year Built"
+    # landlord_os_uprn = None
+    # landlord_property_type = "Property Type Code"
+    # landlord_wall_construction = None
+    # landlord_heating_system = None
+    # landlord_existing_pv = None
+    # landlord_property_id = "Property ref"
+
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
+    # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
+    # sheet_name = "Sheet1"
+    # postcode_column = 'Full Address.1'
+    # fulladdress_column = "Full Address"
+    # address1_column = None
+    # address1_method = "first_word"
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "Build Date"
+    # landlord_os_uprn = None
+    # landlord_property_type = "Property Type"
+    # landlord_wall_construction = "Wallinsul"
+    # landlord_heating_system = "HeatSorc"
+    # landlord_existing_pv = None
+    # landlord_property_id = "Property Reference"
+    # outcomes_filename = None
+    # outcomes_sheetname = None
+    # outcomes_postcode = None
+    # outcomes_houseno = None
+    # master_filepaths = []
+    # master_to_asset_list_filepath = None
+
+    # For Westward
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
+    data_filename = "WESTWARD - completed list - 20.03.2025.xlsx"
+    sheet_name = "Sheet1"
+    postcode_column = "WFT EDIT Postcode"
+    fulladdress_column = "Address"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    landlord_year_built = "Build date"
+    landlord_os_uprn = "UPRN"
+    landlord_property_type = "Location type"
+    landlord_built_form = None
+    landlord_wall_construction = "Wall Construction (EPC)"
+    landlord_heating_system = "Heat Source"
+    landlord_existing_pv = "PV (Y/N)"
+    landlord_property_id = "Place ref"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    master_filepaths = []
+    master_to_asset_list_filepath = None
+    outcomes_id = None
+
+    # For ACIS - programme re-build
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
+    # data_filename = "ACIS asset list.xlsx"
+    # sheet_name = "Assets"
+    # address1_column = "House No"
+    # postcode_column = "Postcode"
+    # landlord_property_id = "UPRN"
+    # fulladdress_column = None
+    # address_cols_to_concat = ["House No", "Street", "Town"]
+    # missing_postcodes_method = None
+    # address1_method = None
+    # landlord_year_built = "YEAR BUILT"
+    # landlord_os_uprn = None
+    # landlord_property_type = "Property type"
+    # landlord_built_form = None
+    # landlord_wall_construction = "Wall Constuction"
+    # landlord_heating_system = "Heating"
+    # landlord_existing_pv = None
+    # outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
+    # outcomes_sheetname = "Feedback"
+    # outcomes_postcode = "Postcode"
+    # outcomes_houseno = "No"
+    # master_filepaths = [
+    #     os.path.join(data_folder, "ECO 3 -Table 1.csv"),
+    #     os.path.join(data_folder, "ECO 4 -Table 1.csv"),
+    # ]
+    # master_to_asset_list_filepath = None
+
+    # For plus dane
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
+    data_filename = "PLUS DANE Asset List - for analysis.xlsx"
+    sheet_name = "Asset List"
+    address1_column = " Address"
+    postcode_column = " Postcode"
+    landlord_property_id = "UPRN"
+    fulladdress_column = " Address"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    address1_method = None
+    landlord_year_built = "Property Age"
+    landlord_os_uprn = None
+    landlord_property_type = "Property Type"
+    landlord_wall_construction = "Landlord Wall Full"
+    landlord_heating_system = "Landlord Heating"
+    landlord_existing_pv = None
+    outcomes_filename = "plus dane outcomes.xlsx"
+    outcomes_sheetname = "EVERYTHING"
+    outcomes_postcode = "Post Code"
+    outcomes_houseno = "Numb."
+    master_filepaths = [
+        os.path.join(data_folder, "JJC Rolling Master.csv"),
+        os.path.join(data_folder, "SCIS Rolling Master.csv"),
+    ]
+    master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv")
+
+    # Maps addresses to uprn in problematic cases
+    manual_uprn_map = {}
+
+    asset_list = AssetList(
+        local_filepath=os.path.join(data_folder, data_filename),
+        header=0,
+        sheet_name=sheet_name,
+        address1_colname=address1_column,
+        postcode_colname=postcode_column,
+        landlord_property_id=landlord_property_id,
+        full_address_colname=fulladdress_column,
+        full_address_cols_to_concat=address_cols_to_concat,
+        missing_postcodes_method=missing_postcodes_method,
+        address1_extraction_method=address1_method,
+        landlord_year_built=landlord_year_built,
+        landlord_uprn=landlord_os_uprn,
+        landlord_property_type=landlord_property_type,
+        landlord_built_form=landlord_built_form,
+        landlord_wall_construction=landlord_wall_construction,
+        landlord_roof_construction=landlord_roof_construction,
+        landlord_heating_system=landlord_heating_system,
+        landlord_existing_pv=landlord_existing_pv,
+        landlord_sap=landlord_sap,
+        phase=phase
+    )
+    asset_list.init_standardise()
+
+    # We produce the new maps, which can be saved for future useage
+    new_property_type_map = {
+        k: v for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_property_type] if
+            asset_list.landlord_property_type else {}
+        ).items()
+        if k not in PROPERTY_MAPPING
+    }
+    new_built_form_map = {
+        k: v for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_built_form] if
+            asset_list.landlord_built_form else {}
+        ).items()
+        if k not in BUILT_FORM_MAPPINGS
+    }
+    new_wall_map = {
+        k: v for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_wall_construction] if
+            asset_list.landlord_wall_construction else {}
+        ).items()
+        if k not in WALL_CONSTRUCTION_MAPPINGS
+    }
+    new_heating_map = {
+        k: v for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_heating_system] if
+            asset_list.landlord_heating_system else {}
+        ).items()
+        if k not in HEATING_MAPPINGS
+    }
+    new_existing_pv_map = {
+        k: v for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
+        ).items()
+        if k not in EXISTING_PV_MAPPINGS
+    }
+    new_roof_construction_map = {
+        k: v for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_roof_construction] if
+            asset_list.landlord_roof_construction else {}
+        ).items()
+        if k not in ROOF_CONSTRUCTION_MAPPINGS
+    }
+
+    asset_list.apply_standardiation()
+
+    # We now flag properties that have been treated under existing programmes
+    asset_list.flag_outcomes(
+        outcomes_filepath=os.path.join(data_folder, outcomes_filename) if outcomes_filename else None,
+        outcomes_sheetname=outcomes_sheetname,
+        outcomes_address=outcomes_address,
+        outcomes_postcode=outcomes_postcode,
+        outcomes_houseno=outcomes_houseno,
+        outcomes_id=outcomes_id
+    )
+
+    asset_list.flag_survey_master(
+        master_filepaths=master_filepaths,
+        master_to_asset_list_filepath=master_to_asset_list_filepath
+    )
+
+    ### We retrieve the EPC data
+
+    # We chunk up this data into 5000 rows at a time
+    # Create the chunks directory
+    epc_api_only = False
+    force_retrieve_data = False
+    skip = None  # Used to skip already completed chunks
+    chunk_size = 1000
+    filename = "Chunk {i}.csv"
+    download_folder = os.path.join(data_folder, "Chunks")
+    if not os.path.exists(download_folder):
+        os.makedirs(download_folder)
+
+    chunk_indexes = list(range(0, len(asset_list.standardised_asset_list), chunk_size))
+    downloaded_files = {filename.format(i=i) for i in chunk_indexes}
+
+    # We check if we have files associated to these files already and if we do, and we do not want to force the
+    # fetching of the data, we skip
+    folder_contents = os.listdir(download_folder)
+    if all(x in folder_contents for x in downloaded_files):
+        skip = max(chunk_indexes)
+
+    if any(x in folder_contents for x in downloaded_files):
+        skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
+
+    for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
+        print(f"Processing chunk {i} to {i + chunk_size}")
+        if skip is not None and not force_retrieve_data:
+            if i <= skip:
+                continue
+        chunk = asset_list.standardised_asset_list[i:i + chunk_size]
+        epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
+            df=chunk,
+            row_id_name=asset_list.DOMNA_PROPERTY_ID,
+            uprn_column=AssetList.STANDARD_UPRN,
+            fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
+            address1_column=AssetList.STANDARD_ADDRESS_1,
+            postcode_column=AssetList.STANDARD_POSTCODE,
+            property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
+            built_form_column=AssetList.STANDARD_BUILT_FORM,
+            manual_uprn_map=manual_uprn_map,
+            epc_api_only=epc_api_only,
+            epc_auth_token=EPC_AUTH_TOKEN
+        )
+
+        # We now retrieve any failed properties
+        chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)]
+        epc_data_failed, _, _ = get_data(
+            df=chunk_failed,
+            row_id_name=asset_list.DOMNA_PROPERTY_ID,
+            uprn_column=AssetList.STANDARD_UPRN,
+            fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
+            address1_column=AssetList.STANDARD_ADDRESS_1,
+            postcode_column=AssetList.STANDARD_POSTCODE,
+            property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
+            built_form_column=AssetList.STANDARD_BUILT_FORM,
+            manual_uprn_map=manual_uprn_map,
+            epc_api_only=epc_api_only,
+            epc_auth_token=EPC_AUTH_TOKEN
+        )
+
+        epc_data_chunk.extend(epc_data_failed)
+
+        # Append the failed data to the main data
+        # Store the chunk locally as a csv
+        pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
+        # Store the errors and no-data locally
+        with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
+            json.dump(errors_chunk, f)
+
+        with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f:
+            json.dump(no_epc_chunk, f)
+
+    # We read in and concatenate the created created chunks
+    # List the contents
+    epc_data = []
+    for file in downloaded_files:
+        csv_data = pd.read_csv(os.path.join(download_folder, file))
+        # We need to convert the recommendations back to a list
+        csv_data["recommendations"] = csv_data["recommendations"].apply(eval)
+        # We don't have this if we didn't run the pulling from find my epc
+        if "find_my_epc_data" in csv_data.columns:
+            csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval)
+        epc_data.append(csv_data)
+
+    epc_df = pd.concat(epc_data)
+    epc_df["estimated"] = epc_df["estimated"].fillna(False)
+
+    # We expand out the recommendations
+    recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]]
+
+    unique_recommendations = set()
+    for _, row in recommendations_df.iterrows():
+        unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+
+    columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
+    transformed_data = []
+    for _, row in recommendations_df.iterrows():
+        # Initialize a dictionary for this row with False for all recommendations
+        row_data = {col: False for col in columns}
+        row_data[asset_list.DOMNA_PROPERTY_ID] = row[asset_list.DOMNA_PROPERTY_ID]
+
+        # Set True for each recommendation present in this row
+        for rec in row["recommendations"]:
+            recommendation_text = rec["improvement-summary-text"]
+            row_data[recommendation_text] = True
+
+        # Append the row data to transformed_data
+        transformed_data.append(row_data)
+
+    transformed_df = pd.DataFrame(transformed_data)
+    transformed_df = transformed_df[
+        [
+            asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
+            "Floor insulation", "Floor insulation (suspended floor)"
+        ]
+    ]
+
+    transformed_df["epc_has_floor_recommendation"] = (
+        transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
+        transformed_df["Floor insulation (suspended floor)"]
+    )
+
+    # Get the find my epc data
+    if "find_my_epc_data" not in epc_df.columns:
+        epc_df["find_my_epc_data"] = None
+
+    find_my_epc_data = []
+    for _, x in epc_df.iterrows():
+        if x["find_my_epc_data"]:
+            find_my_epc_data.append(
+                {
+                    asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
+                    **x["find_my_epc_data"]
+                }
+            )
+        else:
+            find_my_epc_data.append(
+                {
+                    asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
+                }
+            )
+
+    find_my_epc_data = pd.DataFrame(find_my_epc_data)
+
+    find_my_epc_data = find_my_epc_data.merge(
+        transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
+        how="left", on=asset_list.DOMNA_PROPERTY_ID
+    )
+
+    # We check if we get the solar pv column:
+    if "Solar photovoltaics" not in find_my_epc_data.columns:
+        find_my_epc_data["Solar photovoltaics"] = False
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
+        ].rename(
+        columns=asset_list.EPC_API_DATA_NAMES
+    )
+
+    # Look for columns not in the find my EPC data, which will have happened if we didn't
+    # retrieve it in the first place
+    missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
+    if missed_find_epc_cols:
+        for c in missed_find_epc_cols:
+            find_my_epc_data[c] = None
+
+    epc_df = epc_df.merge(
+        find_my_epc_data[
+            [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
+            ]
+        .rename(columns=asset_list.FIND_EPC_DATA_NAMES),
+        how="left",
+        on=asset_list.DOMNA_PROPERTY_ID
+    )
+
+    asset_list.merge_data(epc_df)
+
+    asset_list.extract_attributes()
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    asset_list.identify_worktypes(cleaned)
+
+    pprint(asset_list.work_type_figures)
+
+    asset_list.flat_analysis()
+
+    ################################################################
+    # WESTWARD - comparison between Kieran's method & automated
+    ################################################################
+
+    # Check 1)
+    cavity_fills = pd.read_excel(
+        os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
+        sheet_name="Straight Fill"
+    )
+    cavity_fills = cavity_fills.merge(
+        asset_list.standardised_asset_list[
+            [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"]
+        ],
+        how="left",
+        left_on=asset_list.landlord_property_id,
+        right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
+    )
+    cavity_fills["cavity_reason"] = cavity_fills["cavity_reason"].fillna("Not identified")
+    print(cavity_fills["cavity_reason"].value_counts())
+    # Didn't identify 3 properties because they're bedsits
+    # 4 properties were identified, not based on the non-intrusives but instead because
+    # Westward said they were built in 2003/2007. Have adjusted this to use the age from the
+    # epc as well, as EPC says 1975 and they look like 1975 properties
+    # 37 properties flagged as already having solar - these are all because the landlord said they have solar
+    # e.g.
+    # https://earth.google.com/web/search/11+Winsland+Avenue+TOTNES+TQ9+5FT/@50.43354465,-3.71318276,46.57468503a,
+    # 59.14004365d,35y,0h,0t,
+    # 0r/data=CpABGmISXAolMHg0ODZkMWQxOGE4NWRiZjdkOjB4YjBhM2E5M2Q3YWVlMWEwYhlZYgp7fzdJQCHFfC9027QNwCohMTEgV2luc2xhbmQgQXZlbnVlIFRPVE5FUyBUUTkgNUZUGAIgASImCiQJbxsQEoo3SUARXQcp_HE3SUAZBmiZGJ6yDcAhCA0fqq63DcBCAggBOgMKATBCAggASg0I____________ARAA
+    # https://earth.google.com/web/search/15+St+Anne%27s+Ct,+Newton+Abbot+TQ12+1TL/@50.53068337,-3.61611128,
+    # 11.74908956a,135.73212429d,35y,0h,0t,
+    # 0r/data=CpUBGmcSYQolMHg0ODZkMDVkMjFhODhjZjgxOjB4MjBmMzE2Zjc3MGI2NGMwYxlCxHLw8UNJQCFZqyzALe4MwComMTUgU3QgQW5uZSdzIEN0LCBOZXd0b24gQWJib3QgVFExMiAxVEwYAiABIiYKJAm-r6U2iDdJQBHS5ICRdDdJQBmYGVpmiLINwCG8wcrtqbYNwEICCAE6AwoBMEICCABKDQj___________8BEAA
+
+    # Check 2)
+    cavity_fills_with_solar = pd.read_excel(
+        os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
+        sheet_name="Solar PV - Straight Fill"
+    )
+    cavity_fills_with_solar = cavity_fills_with_solar.merge(
+        asset_list.standardised_asset_list[
+            [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"]
+        ],
+        how="left",
+        left_on=asset_list.landlord_property_id,
+        right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
+    )
+    cavity_fills_with_solar["cavity_reason"] = cavity_fills_with_solar["cavity_reason"].fillna("Not identified")
+    print(cavity_fills_with_solar["cavity_reason"].value_counts())
+    # 203 properties total
+    # 140 properties were flagged up based on non-intrusives (Non-Intrusive Data Showed Empty Cavity)
+    # 63 property already has solar
+
+    # Check 3) RDF
+    rdf = pd.read_excel(
+        os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
+        sheet_name="RDF CIGA checks"
+    )
+    rdf = rdf.merge(
+        asset_list.standardised_asset_list[
+            [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"]
+        ],
+        how="left",
+        left_on=asset_list.landlord_property_id,
+        right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
+    )
+    rdf["cavity_reason"] = rdf["cavity_reason"].fillna("Not identified")
+    print(rdf["cavity_reason"].value_counts())
+    # 264 properties are not identified, 261 of which are due to the fact they contain materials
+    # The other 3 were determined to be eligible for solar instead
+    # Many of these units that were identified for rdf works could be solar jobs
+
+    rdf_with_solar = pd.read_excel(
+        os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
+        sheet_name="Solar PV - RDF CIGA Checks"
+    )
+    rdf_with_solar = rdf_with_solar.merge(
+        asset_list.standardised_asset_list[
+            [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"]
+        ],
+        how="left",
+        left_on=asset_list.landlord_property_id,
+        right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
+    )
+    rdf_with_solar["cavity_reason"] = rdf_with_solar["cavity_reason"].fillna("Not identified")
+    rdf_with_solar["cavity_reason"].value_counts()
+
+    # All others identified - some flagged as empties due to EPC or landlord data suggesting as much
+    # 5 not identified due to containing COMPACTED BEAD
+
+    asset_list.standardised_asset_list = asset_list.standardised_asset_list[
+        asset_list.standardised_asset_list[asset_list.landlord_property_id]
+    ]
+
+    asset_list.load_contact_details(
+        local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
+        sheet_name="Report 1",
+        landlord_property_id=asset_list.landlord_property_id,
+        phone_number_column='Property Current Tel. Number',
+        fullname_column='Proeprty Current Occupant',
+        firstname_column=None,
+        lastname_column=None,
+        email_column=None,  # TODO - we need this
+    )
+
+    # Convert to a format suitable for CRM
+    # TODO: TEMP
+    assigned_surveyors = pd.DataFrame(
+        [
+            {
+                asset_list.landlord_property_id: "02610001",
+                "week_commencing": "10/10/2025",
+                "surveyor_name": "Khalim Conn-Kowlessar",
+                "surveyor_email": "khalim@domna.homes",
+            }
+        ]
+    )
+
+    # TODO: Sort the output by postcode
+
+    company_domain = "ealing.gov.uk"
+    crm_pipeline_name = "Survey Management"
+    first_dealstage = "READY TO BEGIN SCHEDULING"
+    # TODO - temp, upload to either SharePoint or AWS
+
+    asset_list.prepare_for_crm(
+        assigned_surveyors=assigned_surveyors,
+        company_domain=company_domain,
+        crm_pipeline_name=crm_pipeline_name,
+        first_dealstage=first_dealstage
+    )
+    hubspot_data = asset_list.hubspot_data
+
+    # Store as an excel
+    filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
+    # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+
+    with pd.ExcelWriter(filename) as writer:
+        asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+        asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
+        # If we have outcomes, we add a tab with the outcomes
+        if not asset_list.outcomes_for_output.empty:
+            asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
+
+        if not asset_list.unmatched_submissions.empty:
+            asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
+
+        if not asset_list.outcomes_no_match.empty:
+            asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
+
+    # Store the Hubspot export as a csv
+    hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@ -0,0 +1,148 @@
+import numpy as np
+
+STANDARD_BUILT_FORMS = {
+    "unknown",
+    # Houses
+    "end-terrace", "semi-detached", "detached", "mid-terrace",
+    # Flats
+    "ground floor", "mid-floor", "top-floor", "basement"
+}
+
+BUILT_FORM_MAPPINGS = {
+    'House (End Terrace)': 'end-terrace',
+    'Ground Floor Flat General': 'ground floor',
+    'House (Semi)': 'semi-detached',
+    'House (Mid Terrace)': 'mid-terrace',
+    'Bungalow': 'unknown',
+    'House (Mid terrace)': 'mid-terrace',
+    'Maisonette': 'unknown',
+    'Flat': 'unknown',
+    'First Floor Flat General': 'mid-floor',
+    'Bungalow (Semi)': 'semi-detached',
+
+    'Detached House': 'detached',
+    'End Terraced House': 'end-terrace',
+    'Studio (Ground floor)': 'ground floor',
+    'Mid Terraced House': 'mid-terrace',
+    'Ground Floor Flat': 'ground floor',
+    'Semi Detached House': 'semi-detached',
+    'Detached Property': 'detached',
+    'Level not confirmed': 'unknown',
+    'Bedsit': 'unknown',
+    'Cottage': 'detached',
+    'Terraced House': 'mid-terrace',
+    'Studio (1st Floor)': 'ground floor',
+    'Standard Maisonette': 'unknown',
+    'Third Floor Flat or Above': 'top-floor',
+    'Town House': 'end-terrace',
+    'Guest room in a complex': 'unknown',
+    'Back To Back House': 'mid-terrace',
+    'PIMSS EMPTY': 'unknown',
+    'Flat Basement': 'basement',
+    'House': 'unknown',
+    'Second Floor Flat': 'mid-floor',
+    'First Floor Flat': 'ground floor',
+    'Room Only': 'unknown',
+
+    'End Terrace Housex': 'end-terrace',
+    'Mid Terrace Bungalow': 'mid-terrace',
+    'End Terrace Bungalow': 'end-terrace',
+    'Mid Terrace House': 'mid-terrace',
+    'Detached Bungalow': 'detached',
+    'End Terrace House': 'end-terrace',
+    'Mid Terrace Housekeeping ': 'mid-terrace',
+    'Semi Detached Bung': 'semi-detached',
+    'Guest Room': 'unknown',
+    'Coach House': 'detached',
+    'Office Buildings': 'unknown',
+    'Maisonnette': 'mid-floor',
+    'Bedspace': 'unknown',
+    'Studio (3rd floor and above)': 'top-floor',
+    'Adapted Property For Disabled': 'unknown',
+    'Studio (2nd floor)': 'mid-floor',
+    np.nan: 'unknown',
+    'Third Floor Flat': 'mid-floor',
+    '2 Ext. Wall Flat': 'mid-terrace',
+    'Hostel': 'unknown',
+    'Flat: Mid Terrace: Mid Floor': 'mid-terrace',
+    'Bungalow: SemiDetached': 'semi-detached',
+    'Flat: End Terrace: Top Floor': 'end-terrace',
+    'Flat: Enclosed End Terrace: Top Floor': 'end-terrace',
+    'Maisonette: End Terrace: Ground Floor': 'end-terrace',
+    'Flat: End Terrace: Ground Floor': 'end-terrace',
+    'Flat: Mid Terrace: Top Floor': 'mid-terrace',
+    'House: Detached': 'detached',
+    'Flat: End Terrace: Mid Floor': 'end-terrace',
+    'House: SemiDetached': 'semi-detached',
+    'Flat: Semi Detached: Ground Floor': 'semi-detached',
+    'Flat: Semi Detached: Top Floor': 'semi-detached',
+    'Flat: Mid Terrace: Ground Floor': 'mid-terrace',
+    'House: MidTerrace': 'mid-terrace',
+    'House: EndTerrace': 'end-terrace',
+    'Bungalow: EndTerrace': 'end-terrace',
+    'Bungalow: MidTerrace': 'mid-terrace',
+    'Flat: Semi Detached: Mid Floor': 'semi-detached',
+    'Maisonette: Mid Terrace: Top Floor': 'mid-terrace',
+    'Flat: Enclosed Mid Terrace: Mid Floor': 'mid-terrace',
+    'Flat: Enclosed Mid Terrace: Ground Floor': 'mid-terrace',
+    'Flat: Detached: Ground Floor': 'detached',
+    'Flat: Detached: Mid Floor': 'detached',
+    'Flat: Detached: Top Floor': 'detached',
+    'Flat: Enclosed End Terrace: Mid Floor': 'end-terrace',
+    'Bungalow: Detached': 'detached',
+    'Maisonette: End Terrace: Mid Floor': 'end-terrace',
+    'Maisonette: Detached: Top Floor': 'detached',
+    'Flat: Enclosed End Terrace: Ground Floor': 'end-terrace',
+    'Flat: Enclosed Mid Terrace: Top Floor': 'mid-terrace',
+    'House: EnclosedEndTerrace': 'end-terrace',
+    '3 Ext. Wall Flat': 'semi-detached',
+    'Bungalow Detached': 'detached',
+    'Bungalow End Terrace': 'end-terrace',
+    'Bungalow Mid Terrace': 'mid-terrace',
+    'Bungalow Semi Detached': 'detached',
+    'Maisonette 2 Ext. Wall': 'mid-terrace',
+    'Maisonette 3 Ext. Wall': 'semi-detached',
+    'End-terrace': 'end-terrace',
+    'Mid-terrace': 'mid-terrace',
+    'Semi-detached': 'semi-detached',
+    'Detached': 'detached',
+    'Flat / maisonette': 'unknown',
+    '2014 onwards': 'unknown',
+
+    'Semi Detached': 'semi-detached',
+    'End Terraced': 'end-terrace',
+    'Basement': 'basement',
+    'No': 'unknown',
+    'Mid Terrace': 'mid-terrace',
+    'Link Detached': 'detached',
+    'Mid Terraced': 'mid-terrace',
+    'Ground Floor': 'ground floor',
+    'End Terrace': 'end-terrace',
+    'Sheltrd Semi Det': 'semi-detached',
+    'Shop': 'unknown',
+    'Fourth Floor': 'mid-floor',
+    'Terraced': 'mid-terrace',
+    'Leasehold Terr': 'mid-terrace',
+    'Room': 'unknown',
+    'Second Floor': 'mid-floor',
+    'Third Floor': 'mid-floor',
+    'Office': 'unknown',
+    'First Floor Over Arch': 'ground floor',
+    '16-25 IND-PPL': 'unknown',
+    'Seventh Floor': 'top-floor',
+    'Sheltered': 'unknown',
+    'Shelt Bung End': 'end-terrace',
+    'Room In Shared Accommodation': 'unknown',
+    'Sheltred Bung Terrace': 'mid-terrace',
+    'Garage In Block': 'unknown',
+    'First Floor': 'ground floor',
+    'First Floor Over Garage': 'ground floor',
+    'Leasehold': 'unknown',
+    'Sheltred Bung': 'unknown',
+    'Garage': 'unknown',
+    'Sixth Floor': 'top-floor',
+    'Sheltered Bung': 'semi-detached',
+    'Guest': 'unknown',
+    'Fifth Floor': 'mid-floor'
+
+}
--- a/asset_list/mappings/exising_pv.py
+++ b/asset_list/mappings/exising_pv.py
@ -0,0 +1,20 @@
+import numpy as np
+
+STANDARD_EXISTING_PV = {
+    "already has PV", "no PV", "unknown"
+}
+
+EXISTING_PV_MAPPINGS = {
+    "NO": "no PV",
+    "YES": "already has PV",
+    "no": "no PV",
+    "yes": "already has PV",
+    True: "already has PV",
+    False: "no PV",
+    np.nan: 'unknown',
+    'PV: 2kWp array': 'already has PV',
+    'PV: 25% roof area, PV: 3.6kWp array': 'already has PV',
+    'PV: 10% roof area, PV: 2kWp array': 'already has PV',
+    'PV: 50% roof area': 'already has PV',
+    'Solar PV': 'already has PV'
+}
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@ -0,0 +1,206 @@
+import numpy as np
+
+STANDARD_HEATING_SYSTEMS = {
+    "gas combi boiler",
+    "electric storage heaters",
+    "district heating",
+    "gas condensing boiler",
+    "oil boiler",
+    "gas condensing combi",
+    "air source heat pump",
+    "boiler - other fuel",
+    "ground source heat pump",
+    "electric radiators",
+    "other",
+    "electric boiler",
+    "unknown",
+    "communal gas boiler",
+    "high heat retention storage heaters",
+    "room heaters",
+    'electric fuel',
+    'oil fuel',
+    'solid fuel',
+    'gas combi boiler',
+    'unknown',
+    "electric ceiling",
+    "electric underfloor",
+    "no heating"
+}
+
+HEATING_MAPPINGS = {
+    "Combi - GAS": "gas combi boiler",
+    "E7 Storage Heaters": "high heat retention storage heaters",
+    "District heating system": "district heating",
+    "Condensing Boiler - GAS": "gas condensing boiler",
+    "Boiler Oil/other": "oil boiler",
+    "Condensing Combi - Gas": "gas condensing combi",
+    "Air Source Source Heat Pump": "air source heat pump",
+    "Biomass Boiler": "boiler - other fuel",
+    "Ground Source Heat Pump": "ground source heat pump",
+    "Electric Oil filled radiators": "electric radiators",
+    "Solid Fuel": "other",
+    "LPG Boiler": "boiler - other fuel",
+    "Electric Boiler": "electric boiler",
+    "No data": "unknown",
+    "Boiler Communal/Commercial - GAS": "communal gas boiler",
+    "Eco Electric Radiators": "electric radiators",
+    "Gas fire": "other",
+    "Backboiler - Solid fuel": "other",
+    'combi - gas': 'gas combi boiler',
+    'e7 storage heaters': 'high heat retention storage heaters',
+    'district heating system': 'district heating',
+    'condensing boiler - gas': 'gas condensing boiler',
+    'boiler oil/other': 'oil boiler',
+    'condensing combi - gas': 'gas condensing combi',
+    'air source source heat pump': 'air source heat pump',
+    'biomass boiler': 'boiler - other fuel',
+    'ground source heat pump': 'ground source heat pump',
+    'electric oil filled radiators': 'electric radiators',
+    'solid fuel': 'other',
+    'lpg boiler': 'boiler - other fuel',
+    'electric boiler': 'electric boiler',
+    'no data': 'unknown', 'boiler communal/commercial - gas': 'communal gas boiler',
+    'eco electric radiators': 'electric radiators',
+    'gas fire': 'other', 'backboiler - solid fuel': 'other',
+    'ASHP': 'air source heat pump',
+    'COMMHEAT': 'communal gas boiler',
+    'GBB': 'gas combi boiler',
+    'GFS': 'gas condensing boiler',
+    'GWA': 'gas condensing boiler',
+    'GWM': 'gas condensing combi',
+    'HDU': 'district heating',
+    'OILBLR': 'oil boiler',
+    'SOLIDFUEL': 'boiler - other fuel',
+    'STORHTR': 'electric storage heaters',
+    np.nan: 'unknown',
+    'Oil': 'boiler - other fuel',
+    'Gas': 'gas condensing boiler',
+    'Electric': 'electric storage heaters',
+    'Solid fuel': 'other',
+    'No Heat': 'unknown',
+    'GSHP': 'ground source heat pump',
+
+    'Boiler Oil': 'oil boiler',
+    'Boiler Electricity': 'electric boiler',
+    'Boiler ND': 'unknown',
+    'ND Mains gas': 'unknown',
+    'Room heaters Mains gas': "room heaters",
+    'Heat pump (air) Electricity': 'air source heat pump',
+    'Room heaters Electricity': 'electric radiators',
+    'Room heaters Oil': 'room heaters',
+    'No heating system ND': 'no heating',
+    'Heat pump (wet) Electricity': 'ground source heat pump',
+    'Room heaters Biomass': 'room heaters',
+    'ND Solid fuel': 'unknown',
+    'Boiler Mains gas': 'gas combi boiler',
+    'Boiler LPG': 'boiler - other fuel',
+    'Room heaters Solid fuel': 'room heaters',
+    'ND ND': 'unknown',
+    'Storage heating Electricity': 'electric storage heaters',
+    'ND Electricity': 'unknown',
+    'Community heating Community (non-gas)': 'district heating',
+    'No heating system N/A': 'no heating',
+    'Boiler Solid fuel': 'boiler - other fuel',
+    'Community heating Community (mains gas)': 'communal gas boiler',
+    'Boiler Biomass': 'boiler - other fuel',
+    'No heating system Mains gas': 'no heating',
+
+    'Storage heaters': 'electric storage heaters',
+    'Air Source': 'air source heat pump',
+    'Ground source': 'ground source heat pump',
+    'OIl': 'boiler - other fuel',
+    'Quantum storage heaters (old sh on EPC)': 'high heat retention storage heaters',
+    'Quanum Storage heaters': 'high heat retention storage heaters',
+    'Quantum storage heaters (Old SH on EPC)': 'high heat retention storage heaters',
+    'Quantum storage heaters': 'high heat retention storage heaters',
+    'Air Source (EPC says SH)': 'air source heat pump',
+    'ASHP - Was logged as oil': 'air source heat pump',
+    'Ground Source': 'ground source heat pump',
+    'District Heating': 'district heating',
+    'Mains Gas (Communal)': 'communal gas boiler',
+    'LPG': 'boiler - other fuel',
+    'Mains Gas': 'gas condensing boiler',
+    'ELECTRIC': 'electric fuel',
+    'OIL': 'oil fuel',
+    'SOLID FUEL': 'solid fuel',
+    'GAS': 'gas combi boiler',
+    'DO NOT SURVEY': 'unknown',
+    'Gas Boiler': 'gas combi boiler',
+    'Communal Gas ': 'communal gas boiler',
+    'Communal': 'communal gas boiler',
+    'Communal Gas': 'communal gas boiler',
+    'Wood Burning Boiler': "boiler - other fuel",
+    'Oil Fired Boiler': 'oil boiler',
+    'Electric (direct acting) room heaters: Panel, convector or radiant heaters Electricity: Electricity': 'room '
+                                                                                                           'heaters',
+    'Electric Storage Systems: Integrated storage+direct-acting heater Electricity: Electricity': 'electric storage '
+                                                                                                  'heaters',
+    'Community Heating Systems: Community CHP and boilers (RdSAP) Gas: Mains Gas (Community)': 'communal gas boiler',
+    'Boiler: D rated Regular Boiler Gas: Mains Gas': 'gas boiler',
+    'Boiler: C rated Combi Gas: Mains Gas': 'gas combi boiler',
+    'Electric Storage Systems: Fan storage heaters Electricity: Electricity': 'electric storage heaters',
+    ' ': 'unknown',
+    'Boiler: G rated Regular Boiler Gas: Mains Gas': 'gas boiler',
+    'Electric Storage Systems: Modern (slimline) storage heaters Electricity: Electricity': 'electric storage heaters',
+    'Boiler: E rated Regular Boiler Gas: Mains Gas': 'gas boiler',
+    'Boiler: A rated Regular Boiler Electricity: Electricity': 'electric boiler',
+    'Community Heating Systems: Community boilers only (RdSAP) Gas: Mains Gas (Community)': 'communal gas boiler',
+    'Boiler: A rated Combi Gas: Mains Gas': 'gas condensing combi',
+    'Boiler: A rated CPSU Electricity: Electricity': 'electric boiler',
+    'Heat Pump: Electric Heat pumps: Ground source heat pump with flow temperature <= 35Â°C': 'ground source heat pump',
+    'Heat Pump: Electric Heat pumps: Ground source heat pump in other cases': 'ground source heat pump',
+    'Electric Storage Systems: High heat retention storage heaters': 'high heat retention storage heaters',
+    'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35Â°C': 'air source heat pump',
+    'Electric (direct acting) room heaters: Panel, convector or radiant heaters': 'room heaters',
+    'Boiler: C rated Combi': 'gas combi boiler',
+    'Boiler: B rated Regular Boiler': 'gas condensing boiler',
+    'Boiler: E rated Combi': 'gas combi boiler',
+    'Boiler: A rated Combi': 'gas combi boiler',
+    'Boiler: E rated Regular Boiler': 'gas condensing boiler',
+    'Community Heating Systems: Community boilers only (RdSAP)': 'district heating',
+    'Boiler: C rated Regular Boiler': 'gas condensing boiler',
+    'Boiler: A rated Regular Boiler': 'gas condensing boiler',
+    'Electric Storage Systems: Fan storage heaters': 'electric storage heaters',
+    'Boiler: F rated Combi': 'gas combi boiler',
+
+    'Room heaters': 'room heaters',
+    'Room Heaters': 'room heaters',
+    'Boiler': 'gas condensing boiler',
+    'Heat Pump (Wet)': 'air source heat pump',
+    'Community Heating': 'district heating',
+    'Heat pump (wet)': 'air source heat pump',
+    'Electric ceiling heating': 'electric ceiling',
+    'Electric under floor heating': 'electric underfloor',
+    'Community heating': 'district heating',
+
+    'Wet - Radiators Air Source Heat Pump': 'air source heat pump',
+    'Wet - Radiators Electric': 'electric boiler',
+    'Storage Heaters': 'high heat retention storage heaters',
+    'Wet - Radiators Oil': 'oil boiler',
+    'Communal Wet - Radiators Gas': 'communal gas boiler',
+    'Electric - Storage/Panel Heaters Electric': 'electric storage heaters',
+    'Gas Central Heating': 'gas combi boiler',
+    'Wet - Radiators Solar': 'other',
+    'Electric - Storage/Panel Heaters LPG': 'electric storage heaters',
+    'No Heating Solid': 'no heating',
+    'Wet - Underfloor Gas': 'gas condensing boiler',
+    'No Heating Electric': 'no heating',
+    'Oil Fired Central Heating': 'oil boiler',
+    'Warm Air Gas': 'other',
+    'Communal Boilers': 'communal gas boiler',
+    'Wet - Radiators Gas': 'gas combi boiler',
+    'Wet - Radiators Solid': 'solid fuel',
+    'Wet - Radiators LPG': 'other',
+    'No Heating Gas': 'no heating',
+    'No Heating': 'no heating',
+    'Panel Heaters': 'electric radiators',
+    'Rointe Electric Heating': 'electric storage heaters',
+    'Underfloor Heating': 'electric underfloor',
+    'Air Source Heating': 'air source heat pump',
+    'Warm Air Electric': 'other',
+    'Communal Wet - Radiators Electric': 'communal gas boiler',
+    'Wet - Underfloor Solar': 'other',
+    'No Heating Required Gas': 'unknown',
+    'Electric - Storage/Panel Heaters Gas': 'electric storage heaters',
+    'Electric - Storage/Panel Heaters Solid': 'electric storage heaters'
+}
--- a/asset_list/mappings/property_type.py
+++ b/asset_list/mappings/property_type.py
@ -0,0 +1,182 @@
+import numpy as np
+
+# These are the standard categories for property types
+STANDARD_PROPERTY_TYPES = {
+    "house", "flat", "maisonette", "bungalow", "park home", "block house", "bedsit", "coach house",
+    "unknown", "other", "block of flats"
+}
+
+# This is a basic mapping that we use to map values that we've seen commonly to standard values
+PROPERTY_MAPPING = {
+    "HOUSE": "house",
+    "FLAT": "flat",
+    "MAISONET": "maisonette",
+    "BUNGALOW": "bungalow",
+    "BLKHOUS": "block house",
+    "blkhous": "block house",
+    "BEDSIT": "bedsit",
+    "COACHSE": "coach house",
+    "coachse": "coach house",
+    'Admin Unit Type': 'unknown',
+    'Block': 'block of flats',
+    'Bungalow': 'bungalow',
+    'Flat': 'flat',
+    'House': 'house',
+    'Maisonette': 'maisonette',
+    'Stairwell': 'other',
+    'MAISON': 'maisonette',
+    '3 Bed Semi Detached House': 'house',
+    '3 Bed Mid Terrace House': 'house',
+    '2 Bed Semi Detached House': 'house',
+    '4 Bed Semi Detached House': 'house',
+    '2 Bed End Terrace House': 'house',
+    '1 Bed Sheltered Bungalow': 'bungalow',
+    '1 Bed 1st Floor Sheltered Flat': 'flat',
+    '2 Bed Second Floor Flat': 'flat',
+    '1 Bed Mid Terrace House': 'house',
+    '1 Bed End Terrace House': 'house',
+    '7 Bed Detached House': 'house',
+    '4 Bed End Terrace House': 'house',
+    '1 Bed Link House': 'house',
+    '1 Bed Second Floor Flat': 'flat',
+    '2 Bed Detached House': 'house',
+    '1 Bed Ground Floor Flat': 'flat',
+    '2 Bed Sheltered Bungalow': 'bungalow',
+    '4 Bed Mid Terrace House': 'house',
+    '2 Bed Mid Terrace House': 'house',
+    '2 Bed First Floor Flat': 'flat',
+    '3 Bed Detached House': 'house',
+    'Ground Floor Bedsit': 'bedsit',
+    '3 Bed Bungalow': 'bungalow',
+    np.nan: 'unknown',
+    '5 Bed End Terrace House': 'house',
+    '1 Bed Grd Floor Sheltered Flat': 'flat',
+    '3 Bed End Terrace House': 'house',
+    '2 Bed Second Floor Maisonette': 'maisonette',
+    '2 Bed Ground Floor Flat': 'flat',
+    '2 Bed First Floor Maisonette': 'maisonette',
+    '4 Bed Detached House': 'house',
+    '1 Bed Bungalow': 'bungalow',
+    '2 Bed Bungalow': 'bungalow',
+    'First Floor Bedsit': 'bedsit',
+    '3 Bed First Floor Maisonette': 'maisonette',
+    '2 Bed 1st Floor Sheltered Flat': 'flat',
+    '1 Bed First Floor Flat': 'flat',
+    '3 Bed First Floor Flat': 'flat',
+    'ND': 'unknown',
+    'House (Mid Terrace)': 'house',
+    'First Floor Flat General': 'flat',
+    'House (End Terrace)': 'house',
+    'House (Mid terrace)': 'house',
+    'Bungalow (Semi)': 'bungalow',
+    'Ground Floor Flat General': 'flat',
+    'House (Semi)': 'house',
+    'Detached House': 'house',
+    'Bedsit': 'bedsit',
+    'Terraced House': 'house',
+    'Standard Maisonette': 'maisonette',
+    'End Terraced House': 'house',
+    'Third Floor Flat or Above': 'flat',
+    'Town House': 'house',
+    'Mid Terraced House': 'house',
+    'Back To Back House': 'house',
+    'Flat Basement': 'flat',
+    'Ground Floor Flat': 'flat',
+    'Semi Detached House': 'house',
+    'Second Floor Flat': 'flat',
+    'First Floor Flat': 'flat',
+    'Level not confirmed': 'flat',
+    'Cottage': 'house',
+    'Studio (1st Floor)': 'flat',
+    'Studio (Ground floor)': 'flat',
+    'Guest room in a complex': 'other',
+    'PIMSS EMPTY': 'bedsit',
+    'Room Only': 'other',
+    'Detached Property': 'house',
+    'End Terrace Housex': 'house',
+    'Coach House': 'coach house',
+    'Mid Terrace Bungalow': 'bungalow',
+    'End Terrace Bungalow': 'bungalow',
+    'Mid Terrace House': 'house',
+    'Detached Bungalow': 'bungalow',
+    'End Terrace House': 'house',
+    'Mid Terrace Housekeeping ': 'house',
+    'Maisonnette': 'maisonette',
+    'Guest Room': 'unknown',
+    'Office Buildings': 'unknown',
+    'Semi Detached Bung': 'bungalow',
+    'Bedspace': 'bedsit',
+    'Houses/Bungalows': 'bungalow',
+    'Bedsits': 'bedsit',
+    'Unknown': 'unknown',
+    'Sheltered Flats/besits': 'flat',
+    'House/Bungalow  ': 'bungalow',
+    'Low/Med Rise Flats/Mais': 'flat',
+    'Staff/Comm': 'other',
+    'A Rooms': 'other',
+    'Studio (3rd floor and above)': 'flat',
+    'Adapted Property For Disabled': 'unknown',
+    'Studio (2nd floor)': 'flat',
+    'Third Floor Flat': 'flat',
+    '2 Ext. Wall Flat': 'flat',
+    'Hostel': 'other',
+    'House: MidTerrace': 'house',
+    'House: EndTerrace': 'house',
+    'Flat: Mid Terrace: Mid Floor': 'flat',
+    'Bungalow: SemiDetached': 'bungalow',
+    'Bungalow: EndTerrace': 'bungalow',
+    'Flat: End Terrace: Top Floor': 'flat',
+    'Maisonette: End Terrace: Ground Floor': 'maisonette',
+    'Flat: End Terrace: Ground Floor': 'flat',
+    'Flat: Mid Terrace: Top Floor': 'flat',
+    'House: Detached': 'house',
+    'Flat: End Terrace: Mid Floor': 'flat',
+    'House: SemiDetached': 'house',
+    'Flat: Semi Detached: Ground Floor': 'flat',
+    'Flat: Semi Detached: Top Floor': 'flat',
+    'Flat: Mid Terrace: Ground Floor': 'flat',
+    'Bungalow: MidTerrace': 'bungalow',
+    'Flat: Enclosed End Terrace: Top Floor': 'flat',
+    'Flat: Semi Detached: Mid Floor': 'flat',
+    'Maisonette: Mid Terrace: Top Floor': 'maisonette',
+    'House: EnclosedEndTerrace': 'house',
+    'Flat: Detached: Ground Floor': 'flat',
+    'Flat: Detached: Mid Floor': 'flat',
+    'Flat: Detached: Top Floor': 'flat',
+    'Bungalow: Detached': 'bungalow',
+    'Maisonette: End Terrace: Mid Floor': 'maisonette',
+    'Maisonette: Detached: Top Floor': 'maisonette',
+    'Flat: Enclosed Mid Terrace: Mid Floor': 'flat',
+    'Flat: Enclosed Mid Terrace: Ground Floor': 'flat',
+    'Flat: Enclosed End Terrace: Mid Floor': 'flat',
+    'Flat: Enclosed End Terrace: Ground Floor': 'flat',
+    'Flat: Enclosed Mid Terrace: Top Floor': 'flat',
+    '2013 onwards': 'unknown',
+
+    'House 2 Storey': 'house',
+    'Bung': 'bungalow',
+    'House 3 Storey': 'house',
+    'Shared Flat': 'flat',
+    'd': 'unknown',
+    'Mais': 'maisonette',
+    'e': 'unknown',
+    'Shared House': 'house',
+    'House 4 Storey': 'house',
+    'Shared Bungalow': 'bungalow',
+    'Detch': 'house',
+    'Shop': 'other',
+    'Terr': 'house',
+    'Terrace': 'house',
+    'Description': 'unknown',
+    'Hse': 'house',
+    'Room': 'other',
+    'Office': 'other',
+    'Room In Shared Accommodation': 'other',
+    'Apartment': 'flat',
+    'm': 'unknown',
+    'Garage': 'other',
+    'Parking Space': 'other',
+    'Community Centre': 'other',
+    'Communal Facility': 'other',
+    'Semi': 'house'
+}
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@ -0,0 +1,27 @@
+import numpy as np
+
+STANDARD_ROOF_CONSTRUCTIONS = {
+    "pitched access to loft",
+    "pitched no access to loft",
+    "pitched unknown access to loft",
+    "piched unknown insulation",
+    "pitched insulated",
+    "another dwelling above",
+    "flat unknown insulation",
+    "unknown insulated",
+    "unknown",
+}
+
+ROOF_CONSTRUCTION_MAPPINGS = {
+    'Flat': 'flat unknown insulation',
+    'Pitched (access to loft)': 'pitched access to loft',
+    'Pitched (no access to loft)': 'pitched no access to loft',
+    'Another dwelling above': 'another dwelling above',
+    'Same dwelling above': 'another dwelling above',
+    'As-built': 'unknown',
+    'ND (inferred)': 'unknown',
+    '2018 onwards': 'unknown',
+    'Pitched (vaulted ceiling)': 'pitched insulated',
+    np.nan: "unknown",
+    None: "unknown"
+}
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@ -0,0 +1,170 @@
+import numpy as np
+
+STANDARD_WALL_CONSTRUCTIONS = {
+    # Cavity
+    "uninsulated cavity", "filled cavity", "partial insulated cavity", "cavity unknown insulation",
+    # Solic Brick
+    "uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation",
+    # Timber Frame
+    "timber frame unknown insulation", "insulated timber frame", "uninsulated timber frame",
+    "system built", "granite or whinstone", "other",
+    "unknown", "sandstone or limestone",
+    "cob",
+    "new build - average thermal transmittance",
+}
+
+WALL_CONSTRUCTION_MAPPINGS = {
+    "New Build - Average Thermal Transmittance": "new build - average thermal transmittance",
+    'Average thermal transmittance 0.25 W/m?K': 'unknown',
+    'Cavity wall, as built, insulated (assumed)': 'filled cavity',
+    'Average thermal transmittance 0.31 W/m?K': 'unknown',
+    'Cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
+    'Average thermal transmittance 0.30 W/m?K': 'unknown', 'Average thermal transmittance 0.28 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.25 W/m-¦K': 'unknown', 'Average thermal transmittance 0.21 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.20 W/m-¦K': 'unknown', 'Average thermal transmittance 0.29 W/m?K': 'unknown',
+    'Average thermal transmittance 0.16 W/m?K': 'unknown',
+    'Average thermal transmittance 0.27 W/m&#0178;K': 'unknown',
+    'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.18 W/m?K': 'unknown',
+    'Granite or whin, with internal insulation': 'granite or whinstone',
+    "Granite or whinstone, as built, insulated (assumed)": "granite or whinstone",
+    'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown',
+    'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown',
+    'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown',
+    'Average thermal transmittance 0.64 W/m?K': 'unknown', 'Average thermal transmittance 0.61 W/m?K': 'unknown',
+    'Sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
+    'Average thermal transmittance 0.33 W/m?K': 'unknown',
+    'Cavity wall,': "cavity unknown insulation",
+    'Cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
+    'Average thermal transmittance 0.29 W/m-¦K': 'unknown', 'Average thermal transmittance 0.32 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.19 W/m-¦K': 'unknown', 'Average thermal transmittance 0.27 W/m?K': 'unknown',
+    'Average thermal transmittance 0.22 W/m?K': 'unknown', 'Average thermal transmittance 0.38 W/m?K': 'unknown',
+    'Average thermal transmittance 0.26 W/m?K': 'unknown', 'Average thermal transmittance 0.27 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.18 W/m-¦K': 'unknown', 'Average thermal transmittance = 0.27 W/m?K': 'unknown',
+    'Cavity wall, with external insulation': 'filled cavity', 'Average thermal transmittance 0.21 W/m?K': 'unknown',
+    'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown',
+    'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown',
+    'Cavity wall, with internal insulation': 'filled cavity',
+    'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown',
+    'new build - average thermal transmittance': 'new build - average thermal transmittance',
+    'average thermal transmittance 0.25 w/m?k': 'unknown',
+    'cavity wall, as built, insulated (assumed)': 'filled cavity',
+    'average thermal transmittance 0.31 w/m?k': 'unknown',
+    'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
+    'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown',
+    'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m&#0178;k': 'unknown',
+    'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.18 w/m?k': 'unknown',
+    'granite or whin, with internal insulation': 'granite or whinstone',
+    'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown',
+    'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown',
+    'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown',
+    'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown',
+    'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
+    'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': "cavity unknown insulation",
+    'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
+    'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown',
+    'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown',
+    'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown',
+    'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown',
+    'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown',
+    'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown',
+    'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.28 w/m?k': 'unknown',
+    'Cavity wall, filled cavity': 'filled cavity',
+    'Cavity wall, filled cavity and external insulation': 'filled cavity',
+    'Granite or whinstone, as built, no insulation (assumed)': 'granite or whinstone',
+    'Solid brick, as built, insulated (assumed)': 'insulated solid brick',
+    'Solid brick, as built, no insulation (assumed)': 'uninsulated solid brick',
+    'Solid brick, with external insulation': 'insulated solid brick',
+    'Solid brick, with internal insulation': 'insulated solid brick',
+    'System built, as built, insulated (assumed)': 'system built',
+    'System built, as built, no insulation (assumed)': 'system built',
+    'System built, with external insulation': 'system built',
+    'System built, with internal insulation': 'system built',
+    'Timber frame, as built, insulated (assumed)': 'timber frame',
+    'Timber frame, as built, no insulation (assumed)': 'timber frame',
+    'Timber frame, as built, partial insulation (assumed)': 'timber frame',
+    'Timber frame, with additional insulation': 'timber frame',
+    'CAVITY': 'cavity unknown insulation',
+    'COMB': 'unknown',
+    'NONE': 'unknown',
+    'NOTKNOWN': 'unknown',
+    'SOLID': 'solid brick unknown insulation',
+    np.nan: 'unknown',
+    'RENDER/TIMBER FRAME': 'timber frame',
+    'SYSTEM BUILT': 'system built',
+    'PCC PANELS': 'other',
+    'NOT APPLICABLE - FLAT': 'unknown',
+    'BRICK/TIMBER FRAME': 'timber frame',
+    'BRICK/BLOCK CAVITY': 'cavity unknown insulation',
+    'STONE SOLID': 'sandstone or limestone',
+    'EXT CLADDING SYSTEM': 'system built',
+    'BRICK/BLOCK SOLID': 'solid brick unknown insulation',
+
+    'Cavity Filled cavity (with internal/external)': 'filled cavity',
+    'ND (inferred) Filled cavity': 'filled cavity',
+    'Cavity Filled cavity': 'filled cavity',
+    'Cavity Unknown insulation': 'cavity unknown insulation',
+    'Timber frame As-built': 'timber frame',
+    'System build Unknown insulation': 'system built',
+    'Cavity As-built': 'uninsulated cavity',
+    'System build External': 'system built',
+    'ND (inferred) ND (inferred)': 'unknown',
+    'Solid brick External': 'insulated solid brick',
+    'Cavity External': 'filled cavity',
+    'System build As-built': 'system built',
+    'Solid brick Internal': 'insulated solid brick',
+    'Cavity Internal': 'filled cavity',
+    'System build Internal': 'system built',
+    'Solid brick As-built': 'solid brick unknown insulation',
+
+    'Cavity ': 'cavity unknown insulation',
+    'Solid brick ': 'solid brick unknown insulation',
+    'Timber frame Timber frame (good insulation)': 'insulated timber frame',
+    ' ': 'unknown',
+    'Cavity No data': 'cavity unknown insulation',
+    'Non trad ': 'other',
+    'Solid brick / Multiple Attributes ': 'solid brick unknown insulation',
+    'Cavity Believe CWI done by Dyson': 'filled cavity',
+    'Cavity CWI required': 'uninsulated cavity',
+    'Solid brick EWI installed': 'insulated solid brick',
+    'Cavity Cavity batts': 'filled cavity',
+    'Cavity CWI Completed by Dyson': 'filled cavity',
+    None: "unknown",
+    "Cavity": "cavity unknown insulation",
+    'SolidBrick: Unknown': 'solid brick unknown insulation',
+    'Cavity: Unknown': 'cavity unknown insulation',
+    'Cavity: AsBuilt (Post 1995)': 'filled cavity',
+    'Cavity: AsBuilt (1976-1982)': 'cavity unknown insulation',
+    'SystemBuilt: AsBuilt': 'system built',
+    'TimberFrame: AsBuilt': "timber frame unknown insulation",
+    'Cavity: AsBuilt (1983-1995)': 'cavity unknown insulation',
+    'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity',
+    'SolidBrick: AsBuilt': 'solid brick unknown insulation',
+    'Cavity: FilledCavity': 'filled cavity',
+    'SolidBrick: Internal': 'insulated solid brick',
+    'Cavity: External': 'filled cavity',
+    'Sandstone: Internal': 'sandstone or limestone',
+    'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation',
+    'System build': 'system built',
+    'Solid brick': 'solid brick unknown insulation',
+    'Stone': 'sandstone or limestone',
+    'Timber frame': 'timber frame unknown insulation',
+    '2017 onwards': 'new build - average thermal transmittance',
+    'ND (inferred)': 'unknown',
+    'Flat / maisonette': 'other',
+
+    'Other': 'other',
+    'Timber Frame': 'timber frame unknown insulation',
+    'Cavity Wall': 'cavity unknown insulation',
+    'Non-Traditional': 'system built',
+    'PRC': 'system built',
+    'Cross Wall': 'system built',
+    'Solid Wall': 'solid brick unknown insulation',
+    'Traditional': 'other'
+}
--- a/asset_list/requirements.txt
+++ b/asset_list/requirements.txt
@ -0,0 +1,12 @@
+postal
+pandas
+usaddress
+pydantic-settings==2.6.0
+epc-api-python==1.0.2
+fuzzywuzzy
+boto3
+openpyxl
+openai
+tiktoken
+msgpack
+beautifulsoup4
--- a/asset_list/tests/test_standardisation.py
+++ b/asset_list/tests/test_standardisation.py
@ -0,0 +1,5 @@
+from asset_list.AssetList import AssetList
+
+
+def test_multi_unit_address_flagging():
+    assert AssetList._identify_multi_address('Block (Rooms 1-4), 23 Clifton Hill, Newtown, Exeter, EX1 2DL')
--- a/asset_list/utils.py
+++ b/asset_list/utils.py
@ -0,0 +1,183 @@
+import time
+import numpy as np
+import pandas as pd
+from backend.SearchEpc import SearchEpc
+from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
+from tqdm import tqdm
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def get_data(
+    df,
+    manual_uprn_map,
+    epc_auth_token,
+    uprn_column,
+    fulladdress_column,
+    address1_column,
+    postcode_column,
+    property_type_column,
+    built_form_column,
+    epc_api_only=False,
+    row_id_name="row_id",
+):
+    # These re-map the standard property types to forms accepted by the EPC api, so we can predict EPCs
+    property_type_map = {
+        "house": "House",
+        "flat": "Flat",
+        "maisonette": "Maisonette",
+        "bungalow": "Bungalow",
+        "block house": "House",
+        "coach house": "House",
+        "bedsit": "Flat"
+    }
+
+    built_form_map = {
+        "mid-terrace": "Mid-Terrace",
+        "end-terrace": "End-Terrace",
+        "semi-detached": "Semi-Detached",
+        "detached": "Detached"
+    }
+
+    epc_data = []
+    errors = []
+    no_epc = []
+    for _, home in tqdm(df.iterrows(), total=len(df)):
+        try:
+
+            # If we have a block of flats, we cannot retrieve this data
+            if home.get(property_type_column) == "block of flats":
+                no_epc.append(home[row_id_name])
+                continue
+
+            postcode = home[postcode_column]
+            house_number = str(home[address1_column]).strip()
+            full_address = home[fulladdress_column].strip()
+            house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode)
+            if house_no is None:
+                house_no = house_number
+            uprn = manual_uprn_map.get(full_address, None)
+            if uprn is None and home.get(uprn_column):
+                uprn = home[uprn_column]
+
+            if pd.isnull(uprn):
+                uprn = None
+
+            property_type = property_type_map.get(home.get(property_type_column), None)
+            built_form = built_form_map.get(home.get(built_form_column))
+
+            searcher = SearchEpc(
+                address1=str(house_no),
+                postcode=postcode,
+                auth_token=epc_auth_token,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address,
+                max_retries=5,
+                uprn=uprn
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+
+            # Check if we have a flat or appartment
+            if searcher.newest_epc is None and uprn is None:
+                # Try again:
+                if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None:
+                    # Backup
+                    add1 = full_address.split(",")
+                    if len(add1) > 1:
+                        add1 = add1[1].strip()
+                    else:
+                        # Try splitting on space
+                        add1 = full_address.split(" ")[0].strip()
+
+                else:
+                    add1 = str(house_number)
+                searcher = SearchEpc(
+                    address1=add1,
+                    postcode=postcode,
+                    auth_token=epc_auth_token,
+                    os_api_key="",
+                    property_type=None,
+                    fast=True,
+                    full_address=full_address,
+                    max_retries=5
+                )
+
+                if (
+                    "flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in
+                    house_number.lower()
+                ):
+                    searcher.ordnance_survey_client.property_type = "Flat"
+
+                searcher.find_property(skip_os=True)
+
+            # As a final resort, we estimate the EPC
+            if property_type is not None and searcher.newest_epc is None:
+                searcher.ordnance_survey_client.property_type = property_type
+                searcher.ordnance_survey_client.built_form = built_form
+                searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                no_epc.append(home[row_id_name])
+                continue
+
+            # Look for EPC recommendatons
+            try:
+                property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+            except:
+                property_recommendations = {"rows": []}
+
+            if epc_api_only:
+                epc = {
+                    row_id_name: home[row_id_name],
+                    **searcher.newest_epc.copy(),
+                    "recommendations": property_recommendations["rows"]
+                }
+
+                epc_data.append(epc)
+                continue
+
+            # Retrieve data from FindMyEPC
+            try:
+                find_epc_searcher = RetrieveFindMyEpc(
+                    address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"]
+                )
+                find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+            except ValueError as e:
+                if "No EPC found" in str(e) and "address1" in searcher.newest_epc:
+                    try:
+                        find_epc_searcher = RetrieveFindMyEpc(
+                            address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"]
+                        )
+                        find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+                    except ValueError as e:
+                        if "No EPC found" in str(e):
+                            find_epc_data = {}
+                        else:
+                            logger.error(f"Error retrieving FindMyEPC data: {e}")
+                            raise Exception(f"Error retrieving FindMyEPC data: {e}")
+                else:
+                    find_epc_data = {}
+            except Exception as e:
+                raise Exception(f"Error retrieving FindMyEPC data: {e}")
+            time.sleep(np.random.uniform(0.1, 1))
+
+            epc = {
+                row_id_name: home[row_id_name],
+                **searcher.newest_epc.copy(),
+                "recommendations": property_recommendations["rows"],
+                "find_my_epc_data": find_epc_data,
+            }
+
+            epc_data.append(epc)
+        except Exception as e:
+            errors.append(home[row_id_name])
+            time.sleep(5)
+
+    return epc_data, errors, no_epc
--- a/backend/Funding.py
+++ b/backend/Funding.py
@ -0,0 +1,413 @@
+import pandas as pd
+import numpy as np
+from typing import List
+
+from backend.app.plan.schemas import HousingType
+
+
+class Funding:
+    """
+    Given a property, this class identifies if the home is possibly eligible for funding under
+    the various funding schemes. It will also calculate the expected amount of funding available
+    and flag any tenant specific requirements that need to be considered to the funding to be attained
+    """
+
+    SCHEMES = ["eco4", "gbis", "whlg"]
+
+    ECO_SAP_SCORE_THREHOLDS = [
+        {'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0},
+        {'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0},
+        {'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5},
+        {'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5},
+        {'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25},
+        {'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75},
+        {'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75},
+        {'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25},
+        {'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25},
+        {'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75},
+        {'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75},
+        {'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25},
+        {'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25},
+        {'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75}
+    ]
+
+    def __init__(
+        self,
+        tenure: HousingType,
+        starting_epc,
+        starting_sap,
+        postcode,
+        floor_area,
+        council_tax_band,
+        property_recommendations,
+        project_scores_matrix,
+        whlg_eligible_postcodes,
+        gbis_abs_rate: int,
+        eco4_abs_rate: int,
+    ):
+        """
+        Use Pydantic to validate the parameter types
+        :param tenure: Indicates if the property is a social or private home
+        :param starting_epc: The current EPC rating of the property
+        :param starting_sap: The current SAP score for the property
+        :param floor_area: The total floor area of the property
+        :param council_tax_band: The council tax band of the property
+        :param property_recommendations: The recommendations for the property
+        :param project_scores_matrix: The matrix of project scores for ECO4
+        :param whlg_eligible_postcodes: The postcodes eligible for WHLG
+        :param gbis_abs_rate: The assumed £/abs achieved by the installer for GBIS
+        :param eco4_abs_rate: The assumed £/abs achieved by the installer for ECO4
+        """
+
+        # TODO: Things we need to include:
+        #       1) Amount of funding
+        #       2) Fundable measures, as a subset of measures may be fundable, not all
+
+        self.tenure = tenure
+        self.starting_epc = starting_epc
+        self.starting_sap = starting_sap
+        self.postcode = postcode
+        self.starting_eco_band = self.sap_to_eco_band(self.starting_sap)
+        self.floor_area_segment = self.classify_floor_area(floor_area)
+        self.gbis_abs_rate = gbis_abs_rate
+        self.eco4_abs_rate = eco4_abs_rate
+        self.council_tax_band = council_tax_band
+
+        self.recommendations = property_recommendations
+
+        self.measure_types = list({r["measure_type"] for r in property_recommendations if r["default"]})
+
+        # Load in the eco4 project scores matrix
+        # Filter the matrix on scores relevant to this property
+        self.project_scores_matrix = project_scores_matrix[
+            (project_scores_matrix["Floor Area Segment"] == self.floor_area_segment) &
+            (project_scores_matrix["Starting Band"] == self.starting_eco_band)
+            ]
+
+        # The postcode column is already lower case
+        self.whlg_eligible_postcodes = whlg_eligible_postcodes[
+            whlg_eligible_postcodes["Postcode"] == self.postcode.lower()
+            ]
+
+        # Store the final outputs
+        self.gbis_eligibiltiy = {}
+        self.eco4_eligibility = {}
+        self.whlg_eligibility = {}
+
+    def output(
+        self,
+        scheme: str,
+        eligible: bool,
+        types: List[str],
+        measure_types: List[str],
+        project_score: float,
+        estimated_funding: float,
+        notify_tenant_benefits_requirements: bool,
+        notify_council_tax_band_requirements: bool,
+        notify_tenant_low_income_requirements: bool,
+        innovation_required: bool,
+    ):
+        """"
+        """
+
+        if scheme not in self.SCHEMES:
+            raise ValueError("Scheme not recognised")
+
+        return {
+            "scheme": scheme,
+            "eligible": eligible,
+            "type": types,
+            "measure_types": measure_types,
+            "project_score": project_score,
+            "estimated_funding": estimated_funding,
+            "requires_benefits": notify_tenant_benefits_requirements,
+            "requires_council_tax_band": notify_council_tax_band_requirements,
+            "requires_low_income": notify_tenant_low_income_requirements,
+            "innovation_required": innovation_required,
+        }
+
+    @staticmethod
+    def classify_floor_area(floor_area):
+        if floor_area <= 72:
+            return "0-72"
+
+        if floor_area <= 97:
+            return "73-97"
+
+        if floor_area <= 199:
+            return "98-199"
+
+        return "200"
+
+    def eco4(self):
+        """
+        Checks if a property is eligible for ECO4
+        :return:
+        """
+        pass
+
+    def find_gbis_measures(self, measures):
+        """
+        The best measure is one that:
+        1) Creates some SAP movement, therefore enables eligiblity
+        2) Generates the most funding
+        3) Has a reasonable ROI
+        :return:
+        """
+        measure_table = pd.DataFrame([
+            m for m in self.recommendations if
+            (m["type"] in measures) or (m["measure_type"] in measures) and m["default"]
+        ])
+
+        measure_table["post_install_sap"] = measure_table["sap_points"] + self.starting_sap
+        # We classify the movement
+        measure_table["Finishing Band"] = np.floor(measure_table["post_install_sap"]).apply(
+            lambda points: self.sap_to_eco_band(points)
+        )
+        # Remove any measures that generate zero SAP movement
+        measure_table = measure_table[measure_table["Finishing Band"] != self.starting_eco_band]
+
+        if measure_table.empty:
+            raise NotImplementedError("No measures available, handle me!")
+
+        # We merge on the project matrix, on post install band
+        measure_table = measure_table.merge(
+            self.project_scores_matrix, how="left", on="Finishing Band"
+        )
+        # Cost Savings is the abs
+        measure_table["estimated_funding"] = measure_table["Cost Savings"] * self.gbis_abs_rate
+        # We cap any estimated funding at the install cost
+        measure_table["estimated_funding"] = np.where(
+            measure_table["estimated_funding"] >= measure_table["total"],
+            measure_table["total"],
+            measure_table["estimated_funding"]
+        )
+
+        # Sort by the measure that will cost the client the least, per sap point
+        measure_table["cost_minus_funding"] = measure_table["total"] - measure_table["estimated_funding"]
+        measure_table["cost_minus_funding_per_sap"] = measure_table["cost_minus_funding"] / measure_table["sap_points"]
+        measure_table = measure_table.sort_values(["cost_minus_funding_per_sap", "total"], ascending=[True, False])
+
+        return measure_table[
+            ["type", "measure_type", "Cost Savings", "estimated_funding"]
+        ].rename(columns={"Cost Savings": "project_score"}).to_dict("records")
+
+    def sap_to_eco_band(self, sap_points):
+        """
+        Giuven a sap point score, this function will classify the points into the SAP half-band
+        :param sap_points:
+        :return:
+        """
+
+        if sap_points > 100:
+            return "High_A"
+
+        classification = [
+            x for x in self.ECO_SAP_SCORE_THREHOLDS if (x["From"] <= sap_points) and (sap_points <= x["Up to"])
+        ]
+
+        if len(classification) != 1:
+            raise Exception("We should have a single classifcation for SAP points to half band")
+
+        return classification[0]['Band']
+
+    def gbis_prs(self):
+        """
+        Checks if a private rental is eligible for GBIS. There are the following possible options
+        1) General Eligibilty, contigent on EPC D-G and council tax band A-D. Excludes CWI, LI and heating
+           controls
+        2) Low income group - contigent on EPC D-G and tenant must receive benefits. Excludes heating controls
+        3) GBIS Flex route 1, 3 - Great British Insulation Scheme Routes 1 and 3 are for pre-installation
+        SAP bands D-G for owner-occupied households, D-E for private rented sector households
+        (Including F & G if exempt from MEES). If houseold is low income. Excludes heating controls
+        4) GBIS Flex route 2 - EPC E - G and low income household. Excludes heating controls
+
+        Eligible measures:
+        • Solid wall
+        • pitched roof
+        • flat roof
+        • under floor
+        • solid floor park home and
+        • room in-roof insulation
+
+        :return:
+        """
+
+        valid_measures = [
+            "internal_wall_insulation",
+            "external_wall_insulation",
+            "flat_roof_insulation",
+            "suspended_floor_insulation",
+            "room_roof_insulation",
+            # Not available for every eligiblity type
+            "cavity_wall_insulation",
+            "loft_insulation",
+        ]
+
+        # General Eligibility
+        if (
+            (self.starting_epc in ["G", "D", "E", "F"]) and
+            any(
+                [measure in valid_measures for measure in self.measure_types
+                 if measure not in ["cavity_wall_insulation", "loft_insulation"]]
+            ) and
+            (self.council_tax_band in [None, "A", "B", "C", "D"])
+        ):
+            # This function pulls out the various measures that can provide funding under GBIS
+            recommended_measures = self.find_gbis_measures(
+                measures=[m for m in valid_measures if m not in ["cavity_wall_insulation", "loft_insulation"]]
+            )
+            # If the council tax band is missing, we nofify the customer that this is a requirement that
+            # should be checked
+            return [
+                self.output(
+                    scheme="gbis",
+                    eligible=True,
+                    types=[m["type"]],  # This is single measure so we only have one type
+                    measure_types=[m["measure_type"]],
+                    project_score=m["project_score"],
+                    estimated_funding=m["estimated_funding"],
+                    notify_tenant_benefits_requirements=False,
+                    notify_council_tax_band_requirements=self.council_tax_band is None,
+                    notify_tenant_low_income_requirements=False,
+                    innovation_required=False
+                ) for m in recommended_measures
+            ]
+
+        # Low income/flex
+        if (
+            (self.starting_sap in ["G", "D", "E", "F"]) and
+            any([measure in valid_measures for measure in self.measure_types])
+        ):
+            # Find the best measure, and can also include CWI/LI but requires the tenant to be
+            # low inome or on benefits
+            # We find the best measure for GBIS
+            recommended_measures = self.find_gbis_measures(measures=valid_measures)
+            return [
+                self.output(
+                    scheme="gbis",
+                    eligible=True,
+                    types=[m["type"]],  # This is single measure so we only have one type
+                    measure_types=[m["measure_type"]],
+                    project_score=m["project_score"],
+                    estimated_funding=m["estimated_funding"],
+                    notify_tenant_benefits_requirements=True,
+                    notify_council_tax_band_requirements=False,
+                    notify_tenant_low_income_requirements=True,
+                    innovation_required=False
+                ) for m in recommended_measures
+            ]
+
+        # Otherwise, no funding availability
+        return []
+
+    def gbis_social(self):
+        """
+        Because this is social housing, we have two typical means for eligibility
+        1) EPC D, where an innovation measure is required
+        2) EPC G-E, where an innovation measure isn't required
+        :return:
+        """
+        valid_measures = [
+            "internal_wall_insulation",
+            "external_wall_insulation",
+            "flat_roof_insulation",
+            "suspended_floor_insulation",
+            "room_roof_insulation",
+            # Not available for every eligiblity type
+            "cavity_wall_insulation",
+            "loft_insulation",
+            "heating_control"
+        ]
+
+        recommended_measures = self.find_gbis_measures(
+            measures=valid_measures
+        )
+
+        # All measures are available
+        if self.starting_sap == "D":
+            return [
+                self.output(
+                    scheme="gbis",
+                    eligible=True,
+                    types=[m["type"]],  # This is single measure so we only have one type
+                    measure_types=[m["measure_type"]],
+                    project_score=m["project_score"],
+                    estimated_funding=m["estimated_funding"],
+                    notify_tenant_benefits_requirements=False,
+                    notify_council_tax_band_requirements=False,
+                    notify_tenant_low_income_requirements=False,
+                    innovation_required=True
+                ) for m in recommended_measures
+            ]
+
+        if self.starting_sap in ["G", "F", "E"]:
+            return [
+                self.output(
+                    scheme="gbis",
+                    eligible=True,
+                    types=[m["type"]],  # This is single measure so we only have one type
+                    measure_types=[m["measure_type"]],
+                    project_score=m["project_score"],
+                    estimated_funding=m["estimated_funding"],
+                    notify_tenant_benefits_requirements=False,
+                    notify_council_tax_band_requirements=False,
+                    notify_tenant_low_income_requirements=False,
+                    innovation_required=False
+                ) for m in recommended_measures
+            ]
+
+        return []
+
+    def gbis(self):
+        """
+        Check if a property is eligible for GBIS
+        :return:
+        """
+
+        if self.tenure == "Private":
+            self.gbis_eligibiltiy = self.gbis_prs()
+            return
+
+        if self.tenure == "Social":
+            self.gbis_eligibiltiy = self.gbis_social()
+
+        raise NotImplementedError("Implement social/oo")
+
+    def whlg(self):
+        if self.tenure == "Social":
+            # We can't do anything for social housing
+            self.whlg_eligibility = []
+            return
+
+        if not self.whlg_eligible_postcodes.empty:
+            raise Exception("Implement me")
+            # self.whlg_eligibility = [
+            #     self.output(
+            #         scheme,
+            #         eligible,
+            #         types,
+            #         measure_types,
+            #         project_score: float,
+            #         estimated_funding: float,
+            #         notify_tenant_benefits_requirements: bool,
+            #         notify_council_tax_band_requirements: bool,
+            #         notify_tenant_low_income_requirements: bool,
+            #         innovation_required: bool,
+            #     )
+            # ]
+
+    def eco4(self):
+        if self.tenure == "Private":
+            self.eco4_eligibiltiy = self.eco4_prs()
+            return
+
+    def check_eligibiltiy(self):
+        """
+        This function instigates the checking process
+        :return:
+        """
+
+        self.gbis()
+        # self.eco4()
+        self.whlg()
--- a/backend/Property.py
+++ b/backend/Property.py
@ -22,6 +22,7 @@ from recommendations.recommendation_utils import (
 )
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 from backend.app.utils import sap_to_epc
+from backend.Funding import Funding
 import backend.app.assumptions as assumptions

 ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
@ -69,6 +70,10 @@ class Property:
    # Contains the solar panel optimisation results from the Google Solar API
    solar_panel_configuration = None

+    # If true, indicates the floor area has actually been given to us by the owner, and we should use this figure
+    # instead of the one in the EPC, when we simulate
+    owner_floor_area = False
+
    def __init__(
        self,
        id,
@ -103,7 +108,7 @@ class Property:

        self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
        self.non_invasive_recommendations = (
-            ast.literal_eval(non_invasive_recommendations['recommendations']) if
+            non_invasive_recommendations['recommendations'] if
            non_invasive_recommendations else []
        )
        # This is a list of measures that have been recommended for the property
@ -132,9 +137,14 @@ class Property:
        self.energy_cost_estimates = {}
        self.energy_consumption_estimates = {}

+        # when storing the energy, we'll also
        self.energy = {
            "primary_energy_consumption": epc_record.get("energy_consumption_current"),
-            "co2_emissions": epc_record.get("co2_emissions_current"),
+            "epc_co2_emissions": epc_record.get("co2_emissions_current"),
+            # These will be added in once we estimate the amount of emissions from appliances - using the carbon
+            # intensity of electricity
+            "appliances_co2_emissions": None,
+            "co2_emissions": None
        }
        self.ventilation = {
            "ventilation": epc_record.get("mechanical_ventilation"),
@ -202,6 +212,11 @@ class Property:
        # TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data
        self.parse_kwargs(kwargs)

+        # Funding
+        self.gbis_eligibiltiy = None
+        self.eco4_eligibility = None
+        self.whlg_eligibility = None
+
    @classmethod
    def extract_kwargs(cls, kwargs):
        """
@ -215,25 +230,24 @@ class Property:
        # as we collect more data from the energy assessment

        n_bathrooms = kwargs.get("n_bathrooms", None)
-        if n_bathrooms not in [None, ""]:
-            # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
-            n_bathrooms = int(round(float(n_bathrooms) + 1e-5))
+        # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
+        n_bathrooms = int(round(float(n_bathrooms) + 1e-5)) if n_bathrooms not in [None, ""] else None

        n_bedrooms = kwargs.get("n_bedrooms", None)
-        if n_bedrooms not in [None, ""]:
-            n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
+        n_bedrooms = int(round(float(n_bedrooms) + 1e-5)) if n_bedrooms not in [None, ""] else None

        number_of_floors = kwargs.get("number_of_floors", None)
-        if number_of_floors not in [None, ""]:
-            number_of_floors = int(round(float(number_of_floors) + 1e-5))
+        number_of_floors = int(round(float(number_of_floors) + 1e-5)) if number_of_floors not in [None, ""] else None

        insulation_floor_area = kwargs.get("insulation_floor_area", None)
-        if insulation_floor_area not in [None, ""]:
-            insulation_floor_area = float(insulation_floor_area)
+        insulation_floor_area = float(insulation_floor_area) if insulation_floor_area not in [None, ""] else None

        insulation_wall_area = kwargs.get("insulation_wall_area", None)
-        if insulation_wall_area not in [None, ""]:
-            insulation_wall_area = float(insulation_wall_area)
+        insulation_wall_area = float(insulation_wall_area) if insulation_wall_area not in [None, ""] else None
+
+        # We allow for the asset owner to provide us with total floor area, in the event of it being incorrect
+        floor_area = kwargs.get("floor_area", None)
+        floor_area = float(floor_area) if floor_area not in [None, ""] else None

        return {
            "n_bathrooms": n_bathrooms,
@ -242,12 +256,15 @@ class Property:
            "insulation_floor_area": insulation_floor_area,
            "insulation_wall_area": insulation_wall_area,
            "building_id": kwargs.get("building_id", None),
+            "floor_area": floor_area
        }

    def parse_kwargs(self, kwargs):
        # We extract the elements from kwargs that we recognise. Anything additional is ignored
        for arg, val in kwargs.items():
            if val is not None:
+                if arg == "floor_area":
+                    self.owner_floor_area = True
                setattr(self, arg, val)

    def create_base_difference_epc_record(self, cleaned_lookup: dict):
@ -257,14 +274,7 @@ class Property:
        It will be the same starting and ending EPC, as we don't have the expected EPC yet
        """

-        # difference_record = self.epc_record - self.epc_record
-
-        # TODO: change these lower and replace in the settings file
-        # print(
-        #     "CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING"
-        # )
        fixed_data_col_names = MANDATORY_FIXED_FEATURES + LATEST_FIELD
-        # print("NEED TO CHANGE THE DASH TO LOWER CASE")
        fixed_data_col_names = [
            x.lower().replace("_", "-") for x in fixed_data_col_names
        ]
@ -275,8 +285,6 @@ class Property:
            if k in fixed_data_col_names
        }

-        # difference_record.append_fixed_data(fixed_data)
-
        difference_record = self.epc_record.create_EPCDifferenceRecord(
            self.epc_record, fixed_data
        )
@ -285,10 +293,11 @@ class Property:
            datasets=[difference_record], cleaned_lookup=cleaned_lookup
        )

-        # TODO: adjust the base difference record with the previously calculated u values + features
-        # estimated_perimeter is different to the perimeter in the epc record
-
-        # self.base_difference_record.df
+        # If we have variables that have been given to us by the landlord that we know are correct, whereas the EPC
+        # may not be, we use them
+        if self.owner_floor_area is not None:
+            self.base_difference_record.df["total_floor_area_ending"] = self.floor_area
+            self.base_difference_record.df["estimated_perimeter_ending"] = self.perimeter

    def simulate_all_representative_recommendations(
        self, property_representative_recommendations,
@ -374,7 +383,7 @@ class Property:
            for rec in property_recommendations_by_phase:
                # We simulate the impact of the recommendation at this current phase, and all of the prior phases

-                if rec["type"] in ["mechanical_ventilation", "trickle_vents", "draught_proofing"]:
+                if rec["type"] in ["trickle_vents", "draught_proofing"]:
                    continue

                scoring_dict = self.create_recommendation_scoring_data(
@ -382,8 +391,8 @@ class Property:
                    recommendation_record=recommendation_record,
                    recommendations=previous_phase_representatives + [rec],
                    primary_recommendation_id=rec["recommendation_id"],
-                    non_invasive_recommendations=self.non_invasive_recommendations,
                )
+
                self.recommendations_scoring_data.append(scoring_dict)

                simulation_epc = self.epc_record.prepared_epc.copy()
@ -426,6 +435,18 @@ class Property:
                            if phase_epc_transformation[k] == v:
                                continue

+                            if k == "hotwater-description":
+                                if (
+                                    v == "From main system"
+                                ) and (
+                                    phase_epc_transformation["mainheat-description"] == "Electric storage heaters"
+                                ) and (
+                                    "Electric immersion" in phase_epc_transformation["hotwater-description"]
+                                ):
+                                    # It means we've recommended HHR with electric immersion, and shouldn't overwrite
+                                    # the hot water description
+                                    continue
+
                            raise NotImplementedError(
                                "Already have this key in the phase_epc_transformation - implement me"
                            )
@ -441,7 +462,7 @@ class Property:
        if self.simulation_epcs is None:
            raise ValueError("Simulation EPCs have not been created")

-        rec_ids = sorted(list(self.simulation_epcs.keys()))
+        rec_ids = list(self.simulation_epcs.keys())
        updated_simulation_epcs = []
        for rec_id in rec_ids:
            sim_epc = self.simulation_epcs[rec_id].copy()
@ -467,15 +488,12 @@ class Property:
        # Now we havet this data inthe
        self.updated_simulation_epcs = updated_simulation_epcs

-        return updated_simulation_epcs
-
    @staticmethod
    def create_recommendation_scoring_data(
        property_id,
        recommendation_record,
        recommendations: list,
        primary_recommendation_id: int,
-        non_invasive_recommendations: list = None,
    ):
        """
        This function will iterate through a list of recommendations and apply a simulation for each recommendation
@ -484,7 +502,6 @@ class Property:
        :param recommendation_record: The record of the property, which will be updated
        :param recommendations: The list of recommendations to apply
        :param primary_recommendation_id: The id of the primary recommendation, which is used to identify the record
-        :param non_invasive_recommendations: The list of non-invasive recommendations
        :return: The updated recommendation record
        """

@ -513,7 +530,7 @@ class Property:
                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
                "cylinder_thermostat", "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
                "solid_floor_insulation", "suspended_floor_insulation", "mixed_glazing",
-                "windows_glazing"
+                "windows_glazing", "mechanical_ventilation"
            ]:
                # We update the data, as defined in the recommendaton
                for prefix in ["walls", "roof", "floor"]:
@ -539,7 +556,7 @@ class Property:
                "solid_floor_insulation", "suspended_floor_insulation",
                "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
                "heating_control", "secondary_heating", "cylinder_thermostat", "mixed_glazing",
-                "extension_cavity_wall_insulation",
+                "extension_cavity_wall_insulation", "mechanical_ventilation",
            ]:
                raise NotImplementedError(
                    "Implement me, given type %s" % recommendation["type"]
@ -707,6 +724,15 @@ class Property:
            "unadjusted": unadjusted_kwh_estimates
        }

+        # Update carbon with appliances
+        self.energy["appliances_co2_emissions"] = (
+            (unadjusted_kwh_estimates["appliances"] * assumptions.ELECTRICITY_CARBON_INTENSITY) / 1000
+        )
+        # Re-calculate total CO2 emissions
+        self.energy["co2_emissions"] = float(np.round(
+            self.energy["epc_co2_emissions"] + self.energy["appliances_co2_emissions"], 2
+        ))
+
    def set_spatial(self, spatial: pd.DataFrame):
        """
        Sets whether the property is in a conservation area given the output of the ConservationAreaClient
@ -1226,6 +1252,15 @@ class Property:
        if (self.building_id is not None) and (self.solar_panel_configuration is not None):
            return True

+        # If the property is in a conservation area, is listed or is a heriage building, solar panels
+        # become a difficult measure to generally get through planning restrictions and so we do not recommend
+        # solar panels
+        if self.is_listed or self.is_heritage:
+            # If the property is in a conservation area, we can still recommend solar panels
+            # but they need to be done in a way that is sympathetic to the building. E.g. the panels
+            # may be installed such that they are not visible from the street
+            return False
+
        is_valid_property_type = self.data["property-type"] in ["House", "Bungalow", "Maisonette"]
        is_valid_roof_type = (
            self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"]
@ -1294,3 +1329,11 @@ class Property:
        )

        return electric_consumption
+
+    def insert_funding(self, funding_calulator: Funding):
+        """
+        This method inserts the funding into the property object
+        """
+        self.gbis_eligibiltiy = funding_calulator.gbis_eligibiltiy
+        self.eco4_eligibility = funding_calulator.eco4_eligibility
+        self.whlg_eligibility = funding_calulator.whlg_eligibility
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@ -2,6 +2,7 @@ import os
 import time
 import re

+from urllib.parse import urlencode
 import usaddress
 import pandas as pd
 import numpy as np
@ -95,7 +96,7 @@ vartypes = {
    'walls-env-eff': 'str',
    'transaction-type': 'str',
    # 'uprn': "Int64",
-    'current-energy-efficiency': 'float',
+    'current-energy-efficiency': 'Int64',
    'energy-consumption-current': 'float',
    'mainheat-description': 'str',
    'lighting-cost-current': 'float',
@ -138,8 +139,8 @@ class SearchEpc:
    }

    NODATA = {
-        "status": 201,
-        "message": "No data",
+        "status": 204,
+        "message": "no data",
        "error": None
    }

@ -154,7 +155,7 @@ class SearchEpc:
        uprn: [int, None] = None,
        size=None,
        property_type=None,
-        fast=False
+        fast=False,
    ):
        """
        Address lines 1 and postcode are mandatory fields. The other address lines are optional
@ -206,10 +207,15 @@ class SearchEpc:

        try:
            # Updated regex to catch house numbers including alphanumeric ones
-            pattern = r'(?i)(?:flat|apartment)\s*(\d+\w*)|^\s*(\d+\w*)'
-            match = re.search(pattern, address)
-            if match:
-                return next(g for g in match.groups() if g is not None)
+            pattern = r'(?i)(?:flat|apartment|room)\s*(\d+\w*)|^\s*(\d+\w*)'
+            match1 = re.search(pattern, address)
+            if match1:
+                return next(g for g in match1.groups() if g is not None)
+
+            pattern2 = r'(?i)(flat|apartment|room)\s*([a-zA-Z]?\d+[a-zA-Z]?)'
+            match2 = re.search(pattern2, address)
+            if match2:
+                return match2.group(2)

            parsed = usaddress.parse(address)
            # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
@ -220,7 +226,8 @@ class SearchEpc:
                            continue
                        if part == postcode.split(" ")[1]:
                            continue
-                    return part  # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
+                    return part.rstrip(",")
+                    # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
                    # number

            # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found
@ -247,46 +254,36 @@ class SearchEpc:
        else:
            return None

-    def get_epc(self, params=None, size=None):
-        # Get the EPC data with retries
-        size = size if size is not None else self.size
-        if params is None:
-            if self.uprn:
-                params = {"uprn": self.uprn}
-            else:
-                params = {"address": self.address1, "postcode": self.postcode}
+    def _get_epc(self, params, size):
+        """
+        To be called by get_epc() - not for external usage
+        """
+
+        url = os.path.join(self.client.domestic.host, "search")
+        if size:
+            url += "?" + urlencode({k: v for k, v in {"size": size}.items() if v})

        for retry in range(self.max_retries):
            try:

-                if "uprn" in params:
-                    # We use the direct call method inside, since we need to implement uprn as a valid
-                    # parameter for the search function
-                    url = os.path.join(self.client.domestic.host, "search")
-                    response = self.client.domestic.call(method="get", url=url, params=params)
-                else:
-                    response = self.client.domestic.search(params=params, size=size)
+                response = self.client.domestic.call(method="get", url=url, params=params)

                if response:
                    self.data = response
-                    return self.SUCCESS
+                    return {
+                        "response": response,
+                        "msg": self.SUCCESS
+                    }

                if retry > 0:
                    logger.info("Failed previous attempt but retry successful")
                # If we got nothing, final try
                if not response:
                    return {
-                        "status": 204,
-                        "message": "no data",
-                        "error": None
+                        "response": response,
+                        "msg": self.NODATA
                    }

-                return {
-                    "status": 200,
-                    "message": "success",
-                    "error": None
-                }
-
            except Exception as e:
                if retry < self.max_retries - 1:
                    # If not the last retry, wait for 3 seconds before retrying
@ -294,11 +291,66 @@ class SearchEpc:
                else:
                    # If it's the last retry, we continue
                    return {
-                        "status": 500,
-                        "message": "Could not retrieve EPC data",
-                        "error": str(e)
+                        "response": {},
+                        "msg": {
+                            "status": 500,
+                            "message": "Could not retrieve EPC data",
+                            "error": str(e)
+                        }
                    }

+    def get_epc(self, params=None, size=None):
+        # Get the EPC data with retries
+        size = size if size is not None else self.size
+        if params:
+            output = self._get_epc(params=params, size=size)
+            if output["msg"]["status"] == 200:
+                self.data = output["response"]
+            return output["msg"]
+
+        if not self.uprn and not self.address1 and not self.postcode:
+            raise ValueError("No search parameters provided")
+
+        uprn_params = {"uprn": self.uprn} if self.uprn else {}
+        address_params = {}
+        if self.address1:
+            address_params["address"] = self.address1
+        if self.postcode:
+            address_params["postcode"] = self.postcode
+
+        # We attempt the search with uprn params
+
+        data = {"rows": []}
+        api_response = {}
+        if uprn_params:
+            api_response = self._get_epc(params=uprn_params, size=size)
+            if api_response["msg"]["status"] == 200:
+                data["rows"].extend(api_response["response"]["rows"])
+
+        # If we were unsuccessful, we then make a second attempt to fetch the data. We find that
+        # properties are sometimes listed under the wrong UPRN
+        if address_params:
+            api_response = self._get_epc(params=address_params, size=size)
+            if api_response["msg"]["status"] == 200:
+                # We update the data with the correct uprn
+                if self.uprn:
+                    for x in api_response["response"]["rows"]:
+                        x["uprn"] = self.uprn
+
+                data["rows"].extend(api_response["response"]["rows"])
+
+        # We no de-dupe on lmk-key to avoid duplicates
+        seen = set()
+        data["rows"] = [
+            row for row in data["rows"]
+            if row["lmk-key"] not in seen and not seen.add(row["lmk-key"])
+        ]
+
+        if data["rows"]:
+            api_response["msg"] = self.SUCCESS
+
+        return api_response["msg"]
+
    def filter_rows(self, rows, property_type=None, address=None):
        """
        This method should not be used when property_type and address are both not None
@ -343,8 +395,12 @@ class SearchEpc:
                rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
            else:
                best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
+                # Get the UPRN for the best match
+                best_match_uprn = {r["uprn"] for r in rows if r["address"] == best_match[0]}.pop()
                # Get all of the scores
-                rows_filtered = [r for r in rows if r["address"] == best_match[0]]
+                rows_filtered = [
+                    r for r in rows if (r["address"] == best_match[0]) or (r["uprn"] == best_match_uprn)
+                ]

            if rows_filtered:
                return rows_filtered
@ -643,6 +699,7 @@ class SearchEpc:
            estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
            estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
            estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
+
            if vartype == "Int64":
                # We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
                # so this handles this
@ -654,6 +711,13 @@ class SearchEpc:
                estimated_epc[key] = None
                continue

+            if key == "floor-height":
+                # We speficially handle this, to avoid extreme values
+                # We check if we have any rows less than 3.5m
+                if estimation_data[estimation_data["floor-height"].astype(float) <= 3.5].shape[0] > 0:
+                    # Perform the filter
+                    estimation_data = estimation_data[estimation_data["floor-height"].astype(float) <= 3.5]
+
            if vartype == "Int64":
                estimated_value = self._estimate_int(estimation_data, key)
            elif vartype == "float":
@ -676,7 +740,30 @@ class SearchEpc:

        estimated_epc["current-energy-rating"] = sap_to_epc(estimated_epc["current-energy-efficiency"])

+        # Convert the cost current and potential variables - to string integers
+        for variable in ["heating-cost-current", "hot-water-cost-current", "lighting-cost-current",
+                         "heating-cost-potential", "hot-water-cost-potential", "lighting-cost-potential"]:
+            estimated_epc[variable] = str(int(estimated_epc[variable]))
+
+        # This is a string
+        estimated_epc["low-energy-fixed-light-count"] = (
+            str(estimated_epc["low-energy-fixed-light-count"]) if estimated_epc["low-energy-fixed-light-count"] else ""
+        )
+        # This is an int
+        estimated_epc["photo-supply"] = (
+            int(np.round(estimated_epc["photo-supply"])) if estimated_epc["photo-supply"] else estimated_epc[
+                "photo-supply"]
+        )
+
+        estimated_epc["co2-emiss-curr-per-floor-area"] = (
+            estimated_epc["co2-emissions-current"] / estimated_epc["total-floor-area"]
+        )
+
        estimated_epc["postcode"] = self.postcode
+        if not self.uprn:
+            # Update self.uprn too
+            self.uprn = hash(self.address1 + self.postcode)
+
        estimated_epc["uprn"] = self.uprn
        estimated_epc["address"] = self.full_address
        # Indicate that this epc was estimated
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@ -9,8 +9,7 @@ from tqdm import tqdm
 from math import sin, cos, sqrt, atan2, radians

 from utils.logger import setup_logger
-from recommendations.Costs import Costs, MCS_SOLAR_PV_COST_DATA
-from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
+from recommendations.Costs import Costs
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 from backend.Property import Property
 from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data
@ -51,6 +50,16 @@ class GoogleSolarApi:
    MIN_UNIT_PANELS = 4  # Minimum number of panels we allow for a domestic building
    MIN_BUILDING_PANELS = 10  # Minimum number of panels we allow for a block of flats

+    # Max area of a roof space we allow panels for
+    PERCENTAGE_OF_ROOF_LIMIT = 0.8
+
+    # If the roof area that comes back from the solar API is more than 25% larger than the estiamted roof area
+    # that we calcualte based on the property dimensions, we will correct the roof area
+    ROOF_AREA_TOLERANCE = 1.25
+
+    # Error Messages
+    ENTITY_NOT_FOUND_ERROR = 'Requested entity was not found.'
+
    def __init__(self, api_key, max_retries=5):
        """
        Initialize the GoogleSolarApi class with the provided API key and maximum retries.
@ -109,6 +118,13 @@ class GoogleSolarApi:
                response.raise_for_status()  # Raise an error for bad status codes
                return response.json()
            except requests.exceptions.RequestException as e:
+                if (
+                    (e.response.status_code == 404) &
+                    (e.response.json()["error"]["message"] == self.ENTITY_NOT_FOUND_ERROR)
+                ):
+                    logger.warning("No building insights found for the given location.")
+                    return {"error": self.ENTITY_NOT_FOUND_ERROR}
+
                attempt += 1
                print(f"Attempt {attempt} failed: {e}")
                time.sleep(2 ** attempt)  # Exponential backoff
@ -152,6 +168,10 @@ class GoogleSolarApi:
        # If we have no data in the db, or updated_at is more than 6 months
        if self.insights_data is None or is_outdated:
            self.insights_data = self.get_building_insights(longitude, latitude, required_quality)
+            if self.insights_data.get("error") == self.ENTITY_NOT_FOUND_ERROR:
+                # We use default performance since in this case, we couldn't retrieve data. We don't store
+                self.panel_performance = self.default_panel_performance(property_instance=property_instance)
+                return
            self.need_to_store = True

        # Extract key data from the insights response
@ -159,12 +179,19 @@ class GoogleSolarApi:
        # Automatically exclude north-facing segments
        self.exclude_north_facing_segments(property_instance=property_instance)
        # If a property is semi-detached, it's possible for us to include segments from an attached unit
-        if (property_instance.data["built-form"] == "Semi-Detached") and (
-            property_instance.data["extension-count"] == 0
-        ):
-            self.exclude_likely_duplicate_surfaces()
+        if property_instance is not None:
+            if (property_instance.data["built-form"] == "Semi-Detached") and (
+                property_instance.data["extension-count"] == 0
+            ):
+                self.exclude_likely_duplicate_surfaces()

+        # We constrain the roof area, based on the floor area to be more conservative
        self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
+        if (
+            self.roof_area > property_instance.roof_area * self.ROOF_AREA_TOLERANCE
+        ) | (self.roof_area < (2 - self.ROOF_AREA_TOLERANCE) * property_instance.roof_area):
+            self.roof_area = property_instance.roof_area
+
        self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
        self.panel_wattage = self.insights_data["solarPotential"]["panelCapacityWatts"]
        if self.panel_wattage != 400:
@ -179,7 +206,9 @@ class GoogleSolarApi:

        # We now start finding the solar panel configurations
        self.optimise_solar_configuration(
-            energy_consumption=energy_consumption, is_building=is_building, property_instance=property_instance
+            energy_consumption=energy_consumption,
+            is_building=is_building,
+            property_instance=property_instance
        )

        # Finally, if we have a double property, we half the data we stored area
@ -259,8 +288,6 @@ class GoogleSolarApi:
        # minimum is 4
        min_panels = self.MIN_BUILDING_PANELS if is_building else self.MIN_UNIT_PANELS

-        cost_instance = Costs(property_instance=property_instance) if property_instance is not None else None
-
        # Remove any north facing roof segments
        panel_performance = []
        for config in self.insights_data["solarPotential"].get("solarPanelConfigs", []):
@ -294,14 +321,12 @@ class GoogleSolarApi:
            if roi_summary["n_panels"].sum() < min_panels:
                continue

-            if cost_instance is None:
-                total_cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
-            else:
-                total_cost = cost_instance.solar_pv(
-                    n_panels=roi_summary["n_panels"].sum(),
-                    has_battery=False,
-                    n_floors=property_instance.number_of_floors,
-                )["total"]
+            total_cost = Costs.solar_pv(
+                n_panels=roi_summary["n_panels"].sum(),
+                has_battery=False,
+                # Assume the most amount of scaffolding
+                n_floors=3 if property_instance is None else property_instance.number_of_floors
+            )["total"]

            weighted_ratio = np.average(
                roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values
@ -491,6 +516,11 @@ class GoogleSolarApi:
            panel_performance = panel_performance.drop(columns=["n_panels_halved"])
            panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels]

+        # Finally, we prevent pannelled roof area being above a limit
+        panel_performance = panel_performance[
+            panel_performance["panneled_roof_area"] <= self.roof_area * self.PERCENTAGE_OF_ROOF_LIMIT
+            ]
+
        self.panel_performance = panel_performance

    def exclude_north_facing_segments(self, property_instance):
@ -792,15 +822,19 @@ class GoogleSolarApi:
            property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
            # At this level, we check if the property is suitable for solar and if now, skip
            # Or if we have a solar non-invasive recommendation
+
+            non_invasive_rec = next(
+                (r for r in property_instance.non_invasive_recommendations if r["type"] == "solar_pv"), {}
+            ).get("array_wattage")
+
            if (
                (not property_instance.is_solar_pv_valid()) or
-                [r for r in property_instance.non_invasive_recommendations if r["type"] == "solar_pv"]
+                non_invasive_rec is not None
            ):
                continue

            if unit["longitude"] is None or unit["latitude"] is None:
                # At this point, we've checked that solar PV is valid, and so we provide some defaults
-
                property_instance.set_solar_panel_configuration(
                    solar_panel_configuration={
                        "insights_data": None,
@ -855,19 +889,19 @@ class GoogleSolarApi:

        cost_instance = Costs(property_instance=property_instance)

-        # We return a 2.4 and 4 kwp system
+        # We return a 1.6 and 3.2 kwp system
        panel_performance = pd.DataFrame(
            [
                {
-                    'n_panels': 10,
-                    'yearly_dc_energy': 4000 * 0.99,  # Assumed 99% efficient wattage -> dc
+                    'n_panels': 8,
+                    'yearly_dc_energy': 3200 * assumptions.MEDIAN_WATTAGE_TO_DC,
                    'total_cost': cost_instance.solar_pv(
-                        n_panels=10, has_battery=False, n_floors=property_instance.number_of_floors
+                        n_panels=8, has_battery=False, n_floors=property_instance.number_of_floors
                    )["total"],
                    'weighted_ratio': None,
-                    'panneled_roof_area': 10 * assumptions.RDSAP_AREA_PER_PANEL,
-                    'array_wattage': 4000,
-                    'initial_ac_kwh_per_year': 4000 * 0.95,  # Assumed 95% efficient wattage -> ac
+                    'panneled_roof_area': 8 * assumptions.RDSAP_AREA_PER_PANEL,
+                    'array_wattage': 3200,
+                    'initial_ac_kwh_per_year': 3200 * assumptions.MEDIAN_WATTAGE_TO_AC,
                    'lifetime_ac_kwh': None,
                    'lifetime_dc_kwh': None,
                    'roi': None,
@ -879,15 +913,15 @@ class GoogleSolarApi:
                    'rank': None
                },
                {
-                    'n_panels': 6,
-                    'yearly_dc_energy': 2400 * 0.99,  # Assumed 99% efficient wattage -> dc
+                    'n_panels': 4,
+                    'yearly_dc_energy': 1600 * assumptions.MEDIAN_WATTAGE_TO_DC,
                    'total_cost': cost_instance.solar_pv(
                        n_panels=6, has_battery=False, n_floors=property_instance.number_of_floors
                    )["total"],
                    'weighted_ratio': None,
-                    'panneled_roof_area': 6 * assumptions.RDSAP_AREA_PER_PANEL,
-                    'array_wattage': 2400,
-                    'initial_ac_kwh_per_year': 2400 * 0.95,  # Assumed 95% efficient wattage -> ac
+                    'panneled_roof_area': 4 * assumptions.RDSAP_AREA_PER_PANEL,
+                    'array_wattage': 1600,
+                    'initial_ac_kwh_per_year': 1600 * assumptions.MEDIAN_WATTAGE_TO_AC,
                    'lifetime_ac_kwh': None,
                    'lifetime_dc_kwh': None,
                    'roi': None,
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@ -1,7 +1,7 @@
-# Assumes that the average efficiency of an air source heat pump is 250%, taking the median of the 200-400% range,
-# which is often quoted as a sensible efficiency range for air source heat pumps.
+# We assume that the ASHP efficiency is 280%, which is the minimum that Cotswolds Energy Group achieves, as
+# they target this
 PESSIMISTIC_ASHP_EFFICIENCY = 200
-AVERAGE_ASHP_EFFICIENCY = 250
+AVERAGE_ASHP_EFFICIENCY = 280

 # Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
 # be exported. These are averages based on Google research. E.g
@ -11,9 +11,15 @@ SOLAR_CONSUMPTION_WITH_BATTERY_PROPORTION = 0.7

 # Typically, each solar panel takes up around 3.4 m2 of roof space under RdSAP. This was been verified in Elmhurst
 RDSAP_AREA_PER_PANEL = 3.4
+# This is a median based on a sample of properties
+MEDIAN_WATTAGE_TO_AC = 0.965
+MEDIAN_WATTAGE_TO_DC = 0.99

 SOCIAL_TENURES = ["Rented (social)", "rental (social)"]

+# Carbon intensity of electricity, as of 16th Jan 2025
+ELECTRICITY_CARBON_INTENSITY = 0.232
+
 DESCRIPTIONS_TO_FUEL_TYPES = {
    "Air source heat pump, radiators, electric": {
        "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
@ -50,4 +56,12 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
    },
    "Gas instantaneous at point of use": {"fuel": "Natural Gas", "cop": 0.85},
    "Room heaters, wood logs": {"fuel": "Wood Logs", "cop": 1},
+    "Boiler and radiators, coal": {"fuel": "Coal", "cop": 0.85},
+    "From main system, no cylinderstat": {"fuel": "Natural Gas", "cop": 0.85},
 }
+
+# These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
+# if one of these has been recommended.
+measures_needing_ventilation = [
+    "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
+]
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@ -138,7 +138,7 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
                "recommendation_id": recommendation_id,
                "material_id": part["id"],
                "depth": int(part["depth"]) if part["depth"] else None,
-                "quantity": part["quantity"],
+                "quantity": float(part["quantity"]),
                "quantity_unit": part["quantity_unit"],
                "estimated_cost": part["total"],
            }
--- a/backend/app/db/models/materials.py
+++ b/backend/app/db/models/materials.py
@ -19,6 +19,7 @@ class MaterialType(enum.Enum):
    flat_roof_insulation = "flat_roof_insulation"
    room_roof_insulation = "room_roof_insulation"
    windows_glazing = "windows_glazing"
+    cavity_wall_extraction = "cavity_wall_extraction"

    iwi_wall_demolition = "iwi_wall_demolition"
    iwi_vapour_barrier = "iwi_vapour_barrier"
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -1,3 +1,4 @@
+import ast
 import json
 from datetime import datetime

@ -27,9 +28,11 @@ from backend.app.dependencies import validate_token
 from backend.app.plan.schemas import PlanTriggerRequest
 from backend.app.plan.utils import get_cleaned
 from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
+import backend.app.assumptions as assumptions

 from backend.ml_models.api import ModelApi
 from backend.Property import Property
+from backend.Funding import Funding
 from backend.apis.GoogleSolarApi import GoogleSolarApi

 from recommendations.optimiser.CostOptimiser import CostOptimiser
@ -42,6 +45,7 @@ from backend.ml_models.Valuation import PropertyValuation

 from etl.bill_savings.KwhData import KwhData
 from etl.spatial.OpenUprnClient import OpenUprnClient
+from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc

 logger = setup_logger()

@ -120,7 +124,7 @@ def extract_portfolio_aggregation_data(
        # We can now calculate multiple outputs based on default recommendations
        carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])

-        pre_retrofit_co2 = p.data["co2-emissions-current"]
+        pre_retrofit_co2 = p.energy["co2_emissions"]
        post_retrofit_co2 = pre_retrofit_co2 - carbon_savings

        pre_retrofit_energy_bill = sum(p.current_energy_bill.values())
@ -337,7 +341,10 @@ def extract_property_request_data(

    # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
    # we need to check existence of uprn
-    has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else True
+    has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False
+    if has_uprn:
+        has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None]
+
    if has_uprn:
        property_non_invasive_recommendations = next((
            x for x in non_invasive_recommendations if
@ -352,7 +359,6 @@ def extract_property_request_data(
        ), {})

    if isinstance(property_non_invasive_recommendations.get("recommendations"), str):
-        import ast
        property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
            property_non_invasive_recommendations["recommendations"]
        )
@ -363,16 +369,49 @@ def extract_property_request_data(
            else:
                transformed.append(rec)

-        property_non_invasive_recommendations["recommendations"] = str(transformed)
+        property_non_invasive_recommendations["recommendations"] = transformed

-    property_valution = next((
-        float(x["value"]) for x in valuation_data if
-        (str(x["uprn"]) == str(uprn))
-    ), None)
+    # Check if the valuation data has uprn
+    valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else False
+    if valuation_has_uprn:
+        valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None]
+
+    if valuation_has_uprn:
+        property_valution = next((
+            float(x["valuation"]) for x in valuation_data if
+            (str(x["uprn"]) == str(uprn))
+        ), None)
+    else:
+        property_valution = next((
+            float(x["valuation"]) for x in valuation_data if
+            (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+        ), None)

    return patch, property_already_installed, property_non_invasive_recommendations, property_valution


+def get_funding_data():
+    """
+    This function retrieves the eco project scores matrix and the warm homes local grant funding data
+    :return:
+    """
+    project_scores_matrix = read_csv_from_s3(
+        bucket_name=get_settings().DATA_BUCKET,
+        filepath="funding/ECO4 Full Project Scores Matrix.csv",
+    )
+    project_scores_matrix = pd.DataFrame(project_scores_matrix)
+    project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings']
+    project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float)
+
+    whlg_eligible_postcodes = read_csv_from_s3(
+        bucket_name=get_settings().DATA_BUCKET,
+        filepath="funding/whlg eligible postcodes.csv",
+    )
+    whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
+
+    return project_scores_matrix, whlg_eligible_postcodes
+
+
 router = APIRouter(
    prefix="/plan",
    tags=["plan"],
@ -393,6 +432,14 @@ async def trigger_plan(body: PlanTriggerRequest):
        session.begin()
        logger.info("Getting the inputs")
        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
+        # Check for duplicate UPRNS
+        input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")]
+
+        if input_uprns:
+            # Check for dupes
+            if len(input_uprns) != len(set(input_uprns)):
+                raise ValueError("Duplicate UPRNs in the input data")
+
        # If we have patches or overrides, we should read them in here
        patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body)

@ -424,13 +471,22 @@ async def trigger_plan(body: PlanTriggerRequest):

            # Create a record in db
            property_id, is_new = create_property(
-                session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean,
-                epc_searcher.uprn,
-                energy_assessment
+                session=session,
+                portfolio_id=body.portfolio_id,
+                address=epc_searcher.address_clean,
+                postcode=epc_searcher.postcode_clean,
+                uprn=epc_searcher.uprn,
+                energy_assessment=energy_assessment
            )
            if not is_new and not body.multi_plan:
                continue

+            if epc_searcher.newest_epc is None:
+                raise ValueError(
+                    "No EPCs found for this property and did not estimate - likely need to provide a"
+                    "property type and built form"
+                )
+
            if is_new:
                create_property_targets(
                    session,
@ -459,6 +515,14 @@ async def trigger_plan(body: PlanTriggerRequest):
                )
            )

+            # if we have a remote assment data type, we pull the additional data and include it
+            if body.event_type == "remote_assessment":
+                logger.info("Retrieving find my epc data")
+                property_non_invasive_recommendations = RetrieveFindMyEpc.get_from_epc(
+                    epc_searcher.newest_epc
+                )
+                # TODO: We need to determine if we should make a patch, if the EPC is new
+
            epc_records = patch_epc(patch, epc_records)

            prepared_epc = EPCRecord(
@ -489,7 +553,8 @@ async def trigger_plan(body: PlanTriggerRequest):
        model_api = ModelApi(
            portfolio_id=body.portfolio_id,
            timestamp=created_at,
-            prediction_buckets=get_prediction_buckets()
+            prediction_buckets=get_prediction_buckets(),
+            max_retries=1
        )
        await model_api.async_warm_up_lambdas(
            model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES
@ -501,6 +566,7 @@ async def trigger_plan(body: PlanTriggerRequest):
        logger.info("Reading in materials and cleaned datasets")
        materials = get_materials(session)
        cleaned = get_cleaned()
+        eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data()

        kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)

@ -584,8 +650,10 @@ async def trigger_plan(body: PlanTriggerRequest):
        recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)

        recommendations_scoring_data = recommendations_scoring_data.drop(
-            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
-                     "carbon_ending"]
+            columns=[
+                "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                "carbon_ending"
+            ]
        )

        all_predictions = await model_api.async_paginated_predictions(
@ -604,6 +672,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                    property_instance=property_instance,
                    all_predictions=all_predictions,
                    recommendations=recommendations,
+                    representative_recommendations=representative_recommendations
                )
            )

@ -625,8 +694,6 @@ async def trigger_plan(body: PlanTriggerRequest):
        )

        # We now insert kwh estimates and costs into the recommendations
-        # TODO: We should join the methodology which maps the heating and hot water descriptions to the fuel types in
-        #       Recommendations, but also the Property class
        logger.info("Calculating tenant savings - kwh and bills")
        for property_id in tqdm([p.id for p in input_properties]):
            property_recommendations = recommendations.get(property_id, [])
@ -636,59 +703,130 @@ async def trigger_plan(body: PlanTriggerRequest):
                Recommendations.calculate_recommendation_tenant_savings(
                    property_instance=property_instance,
                    kwh_simulation_predictions=kwh_simulation_predictions,
-                    property_recommendations=property_recommendations
+                    property_recommendations=property_recommendations,
+                    ashp_cop=body.ashp_cop
                )
            )
            property_instance.current_energy_bill = property_current_energy_bill

        # Insert the predictions into the recommendations and run the optimiser
-        # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
-        #       possibility with heating system
-        # TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
-        #      cylinder jacket), we should add these to the recommendations as default
-
        for p in input_properties:
            if not recommendations.get(p.id):
                continue

-            input_measures = prepare_input_measures(recommendations[p.id], body.goal)
+            # we need to double unlist because we have a list of lists
+            property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs}

-            current_sap_points = int(p.data["current-energy-efficiency"])
-            target_sap_points = epc_to_sap_lower_bound(body.goal_value)
-            sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)
+            property_required_measures = [
+                m for m in recommendations[p.id] if m[0]["type"] in body.required_measures
+            ]
+            measures_to_optimise = [
+                m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures
+            ]

-            if not body.optimise:
-                if body.goal != "Increasing EPC":
-                    raise NotImplementedError("Only EPC optimisation is currently supported")
+            # If we have a wall insulation measure, we MUST include mechanical ventilation
+            # Additionally, if we have required measures, they should also be included. Therefore
+            # we can discount the number of points required to get to the target SAP band (or increase)
+            # in the case of ventilation
+            needs_ventilation = any(x in property_measure_types for x in assumptions.measures_needing_ventilation)
+
+            input_measures = prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation)
+
+            if not input_measures[0]:
+                # This means that we have no defaults
+                selected_recommendations = {}
                solution = []
-                for sub_list in input_measures:
-                    # Select the entry with the highest gain, and if tied, choose the one with the lowest cost
-                    best_measure = max(sub_list, key=lambda x: (x['gain'], -x['cost']))
-                    solution.append(best_measure)
            else:

-                if body.budget:
-                    optimiser = GainOptimiser(
-                        input_measures, max_cost=body.budget, max_gain=sap_gain if sap_gain > 0 else 0
+                fixed_gain = 0
+                if property_required_measures:
+                    # We get the SAP points for the required measures
+                    if body.goal != "Increasing EPC":
+                        raise NotImplementedError("Only EPC optimisation is currently supported")
+                    sap_by_type = [
+                        {"type": rec["type"], "sap_points": rec["sap_points"]} for recs in property_required_measures
+                        for rec in recs
+                    ]
+                    # We get a MAX sap points per type
+                    max_per_type = (
+                        pd.DataFrame(sap_by_type).groupby("type")["sap_points"].max().to_dict()
                    )
+                    fixed_gain = sum(max_per_type.values())
+
+                    property_required_measure_types = {rec["type"] for rec in sap_by_type}
+
+                    # if the property needs ventilation, but the measure we optimise didn't include
+                    # venilation we add the points for ventilation as a fixed gain
+                    if needs_ventilation and any(
+                        r in property_required_measure_types for r in assumptions.measures_needing_ventilation
+                    ):
+                        fixed_gain += next(
+                            (r[0]["sap_points"] for r in recommendations[p.id] if
+                             r[0]["type"] == "mechanical_ventilation"),
+                            0
+                        )
+
+                current_sap_points = int(p.data["current-energy-efficiency"])
+
+                sap_gain = CostOptimiser.calculate_sap_gain_with_slack(
+                    epc_to_sap_lower_bound(body.goal_value) - current_sap_points
+                ) - fixed_gain
+
+                if not body.optimise:
+                    if body.goal != "Increasing EPC":
+                        raise NotImplementedError("Only EPC optimisation is currently supported")
+                    solution = []
+                    for sub_list in input_measures:
+                        # Select the entry with the highest gain, and if tied, choose the one with the lowest cost
+                        best_measure = max(sub_list, key=lambda x: (x['gain'], -x['cost']))
+                        solution.append(best_measure)
                else:
-                    # The minimum gain is the minimum number of SAP points required to get to the target SAP band
-                    # If the gain is negative, the optimiser will return an empty solution
-                    optimiser = CostOptimiser(
-                        input_measures,
-                        min_gain=sap_gain
-                    )

-                optimiser.setup()
-                optimiser.solve()
-                solution = optimiser.solution
+                    if body.budget:
+                        optimiser = GainOptimiser(
+                            input_measures, max_cost=body.budget, max_gain=sap_gain if sap_gain > 0 else 0
+                        )
+                    else:
+                        # The minimum gain is the minimum number of SAP points required to get to the target SAP band
+                        # If the gain is negative, the optimiser will return an empty solution
+                        optimiser = CostOptimiser(
+                            input_measures,
+                            min_gain=sap_gain
+                        )

-            selected_recommendations = {r["id"] for r in solution}
+                    optimiser.setup()
+                    optimiser.solve()
+                    solution = optimiser.solution
+
+                selected_recommendations = {r["id"] for r in solution}
+
+            if property_required_measures:
+                # We select the cheapest of the required measures, into selected
+                for recs in property_required_measures:
+                    # We select the cheapest of the required measures
+                    cost_to_id = {
+                        rec["recommendation_id"]: rec["total"] for rec in recs
+                        if rec["recommendation_id"] not in selected_recommendations
+                    }
+                    # Take the recommendation id with the lowers cost
+
+                    selected_recommendations.add(min(cost_to_id, key=cost_to_id.get))
+                    # Update the solution with the selected recommendaitons
+                    solution = []
+                    for recs in recommendations[p.id]:
+                        for rec in recs:
+                            if rec["recommendation_id"] in selected_recommendations:
+                                solution.append(
+                                    {
+                                        "id": rec["recommendation_id"],
+                                        "cost": rec["total"],
+                                        "gain": rec["sap_points"],
+                                        "type": rec["type"]
+                                    }
+                                )

            # If wall insulation is selected, we also include mechanical ventilation as a best practice measure
-            if any(x in [r["type"] for r in solution] for x in [
-                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
-            ]):
+            if any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation):
                ventilation_rec = next(
                    (r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
                    None
@ -717,10 +855,57 @@ async def trigger_plan(body: PlanTriggerRequest):
            ]

            # We'll also unlist the recommendations so they're a bit easier to handle from here onwards
-            final_recommendations = [
+            recommendations[p.id] = [
                rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type
            ]
-            recommendations[p.id] = final_recommendations
+
+        # when we have buildings, we tweak our solar PV recommendations as if one unit needs it, we apply it to all
+        # of them
+        # TODO: We can probably do better and optimise at the building level - this is temp
+        logger.info("Adjusting solar PV recommendations for buildings")
+        building_ids = set([p.building_id for p in input_properties if p.building_id is not None])
+
+        for bid in building_ids:
+            # We check if any of them have solar PV
+            building = [p for p in input_properties if p.building_id == bid]
+            has_solar = False
+            for unit in building:
+                # Get default recommendations
+                has_solar = len([r for r in recommendations[unit.id] if r["default"] and r["type"] == "solar_pv"]) > 0
+                if has_solar:
+                    break
+
+            if has_solar:
+                # We adjust the units within the building
+                for unit in building:
+                    for rec in recommendations[unit.id]:
+                        if rec["type"] == "solar_pv":
+                            # This is straightforward, we just set the default to True, since when we're at a building
+                            # level, we only allow 1 solar PV option for each unit. If we change this, this logic will
+                            # need to be updated
+                            rec["default"] = True
+
+        # ~~~~~~~~~~~~~~~~
+        # Funding
+        # ~~~~~~~~~~~~~~~~
+
+        # for p in input_properties:
+        #     funding_calulator = Funding(
+        #         tenure=body.housing_type,
+        #         starting_epc=p.data["current-energy-rating"],
+        #         starting_sap=int(p.data["current-energy-efficiency"]),
+        #         postcode=p.postcode,
+        #         floor_area=p.floor_area,
+        #         council_tax_band=None,  # This is seemingly always None at the moment
+        #         property_recommendations=recommendations[p.id],
+        #         project_scores_matrix=eco_project_scores_matrix,
+        #         whlg_eligible_postcodes=whlg_eligible_postcodes,
+        #         gbis_abs_rate=15,
+        #         eco4_abs_rate=15,
+        #     )
+        #     funding_calulator.check_eligibiltiy()
+        #     # Insert finding
+        #     p.insert_funding(funding_calulator)

        logger.info("Uploading recommendations to the database")
        # If we have any work to do, we create a new scenario
@ -759,7 +944,11 @@ async def trigger_plan(body: PlanTriggerRequest):
                    new_epc = sap_to_epc(new_sap_points)
                    new_epc_bands[p.id] = new_epc

-                    valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
+                    total_cost = sum([r["total"] for r in default_recommendations])
+
+                    valuations = PropertyValuation.estimate(
+                        property_instance=p, target_epc=new_epc, total_cost=total_cost
+                    )
                    property_value_increase_ranges[p.id] = valuations

                    if p.is_new:
@ -844,6 +1033,7 @@ async def trigger_plan(body: PlanTriggerRequest):

        # Commit final changes
        session.commit()
+
    except IntegrityError:
        logger.error("Database integrity error occurred", exc_info=True)
        session.rollback()
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@ -37,6 +37,7 @@ MEASURE_MAP = {

 VALID_GOALS = ["Increasing EPC"]
 VALID_HOUSING_TYPES = ["Social", "Private"]
+VALID_EVENT_TYPES = ["remote_assessment"]


 # Define the validation function for inclusions/exclusions
@ -56,10 +57,16 @@ def check_housing_type(value: str) -> str:
    return value


+def check_event_type(value: str) -> str:
+    assert value in VALID_EVENT_TYPES, f"{value} is not a valid event type"
+    return value
+
+
 # Use Annotated with BeforeValidator for each list item validation
 InclusionOrExclusionItem = Annotated[str, BeforeValidator(check_inclusion_or_exclusion)]
 Goal = Annotated[str, BeforeValidator(check_goals)]
 HousingType = Annotated[str, BeforeValidator(check_housing_type)]
+EventType = Annotated[str, BeforeValidator(check_event_type)]


 class PlanTriggerRequest(BaseModel):
@ -75,8 +82,17 @@ class PlanTriggerRequest(BaseModel):
    valuation_file_path: Optional[str] = None
    exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
    inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
+    # This is a list of measures that we want to be included, if they are options
+    # Default to empty
+    required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=[], min_length=1)

    scenario_name: Optional[str] = ""
    multi_plan: Optional[bool] = False
    optimise: Optional[bool] = True
    default_u_values: Optional[bool] = True
+
+    ashp_cop: Optional[float] = 2.8
+
+    # When performing a remote assessment, if this has been set, it will allow the engine to
+    # pull data from the find my epc website, to utilise as part of a remote assessment
+    event_type: Optional[float] = "remote_assessment",
--- a/backend/app/plan/utils.py
+++ b/backend/app/plan/utils.py
@ -1,9 +1,5 @@
-import pandas as pd
-from backend.Property import Property
 from utils.s3 import read_from_s3

-from recommendations.recommendation_utils import get_wall_u_value, get_floor_u_value, get_roof_u_value
-
 from backend.app.config import get_settings
 import msgpack

--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@ -28,8 +28,8 @@ class AnnualBillSavings:

    # Latest price cap figures from Ofgem are for April 2024
    # https://www.ofgem.gov.uk/energy-price-cap
-    ELECTRICITY_PRICE_CAP = 0.2236
-    GAS_PRICE_CAP = 0.0548
+    ELECTRICITY_PRICE_CAP = 0.2486
+    GAS_PRICE_CAP = 0.0634
    # This is the most recent export payment figure, at 9.28p/kWh
    # Smart export guarantee rates can be found here:
    # https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates
@ -39,8 +39,8 @@ class AnnualBillSavings:
    PRICE_FACTOR = 0.09549999999999999

    # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
-    DAILY_STANDARD_CHARGE_GAS = 0.3143
-    DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
+    DAILY_STANDARD_CHARGE_GAS = 0.3165
+    DAILY_STANDARD_CHARGE_ELECTRICITY = 0.6097

    # Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
    # For July 2024. These quotes are based on the east midlands region, so we
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@ -1,5 +1,4 @@
 import numpy as np
-from scipy.constants import value


 class PropertyValuation:
@ -203,12 +202,43 @@ class PropertyValuation:
        return msm_increase, lloyds_increase

    @classmethod
-    def estimate(cls, property_instance, target_epc):
+    def estimate(cls, property_instance, target_epc, total_cost=None):
+        """
+        This function estimates the value of a property based on the current EPC rating and the target EPC rating
+        :param property_instance: An instance of the Property class
+        :param target_epc: The target EPC rating
+        :param total_cost: The total cost of the retrofit
+        :return:
+        """
        current_value = (
            property_instance.valuation if property_instance.valuation else
            cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
        )

+        current_epc = property_instance.data["current-energy-rating"]
+
+        if not current_value:
+            return {
+                "current_value": 0,
+                "lower_bound_increased_value": 0,
+                "upper_bound_increased_value": 0,
+                "average_increased_value": 0,
+                "average_increase": 0
+            }
+
+        return cls.estimate_valuation_improvement(current_value, current_epc, target_epc, total_cost)
+
+    @classmethod
+    def estimate_valuation_improvement(cls, current_value, current_epc, target_epc, total_cost=None):
+        """
+        This function estimates the value of a property based on the current EPC rating and the target EPC rating
+        :param current_value:
+        :param current_epc:
+        :param target_epc:
+        :param total_cost:
+        :return:
+        """
+
        if not current_value:
            return {
                "current_value": 0,
@ -218,7 +248,6 @@ class PropertyValuation:
                "average_increase": 0
            }

-        current_epc = property_instance.data["current-energy-rating"]
        # We get the spectrum of ratings between the current and target EPC
        epc_band_range = cls.EPC_BANDS[cls.EPC_BANDS.index(current_epc): cls.EPC_BANDS.index(target_epc) + 1]

@ -242,6 +271,19 @@ class PropertyValuation:

        avg_increase = np.mean(all_increases)

+        if total_cost is not None:
+            # We CAP the retrofit ROI at 2
+            avg_increase_value = current_value * avg_increase
+            if avg_increase_value / total_cost > 2:
+                # We re-scale the % so that the average value increase is no more than 2 times the total cost
+                double_cost = 2 * total_cost
+                new_avg_increase = double_cost / current_value
+                scalar = new_avg_increase / avg_increase
+                # We scale the min and max increases by the same scalar
+                min_increase *= scalar
+                max_increase *= scalar
+                avg_increase = new_avg_increase
+
        return {
            "current_value": current_value,
            "lower_bound_increased_value": float(current_value * (1 + min_increase)),
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@ -39,6 +39,7 @@ class ModelApi:
        timestamp,
        prediction_buckets,
        base_url="https://api.dev.hestia.homes",
+        max_retries=2,
    ):
        """
        This class handles the communication with the Model APIs. These models include SAP change, heat demain change
@ -54,6 +55,8 @@ class ModelApi:
        self.timestamp = timestamp
        self.prediction_buckets = prediction_buckets

+        self.max_retries = max_retries
+
    @staticmethod
    def predictions_template():
        return {
@ -295,15 +298,33 @@ class ModelApi:

        async def run_batches():
            for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
-                predictions_dict = await self.predict_all_async(
-                    df=data.iloc[chunk:chunk + batch_size],
-                    bucket=bucket,
-                    model_prefixes=model_prefixes,
-                    extract_ids=extract_ids
-                )

-                for key, scored in predictions_dict.items():
-                    all_predictions[key] = pd.concat([all_predictions[key], scored])
+                attempts = 0
+                success = False
+                while attempts <= self.max_retries and not success:
+                    try:
+                        predictions_dict = await self.predict_all_async(
+                            df=data.iloc[chunk:chunk + batch_size],
+                            bucket=bucket,
+                            model_prefixes=model_prefixes,
+                            extract_ids=extract_ids
+                        )
+
+                        for key, scored in predictions_dict.items():
+                            all_predictions[key] = pd.concat([all_predictions[key], scored])
+
+                        success = True
+                    except Exception as e:
+                        attempts += 1
+                        logger.error(
+                            f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}/{self.max_retries}). "
+                            f"Error: {e}"
+                        )
+
+                        if attempts > self.max_retries:
+                            logger.error(
+                                f"Skipping batch {chunk}-{chunk + batch_size} after {self.max_retries} failed attempts."
+                            )

        # Check if there is an existing event loop
        try:
--- a/backend/requirements/requirements.txt
+++ b/backend/requirements/requirements.txt
@ -29,3 +29,5 @@ mip==1.15.0
 pyarrow==17.0.0
 fastparquet==2024.5.0
 aiohttp==3.10.10
+# find my epc
+beautifulsoup4
--- a/backend/tests/test_search_epc.py
+++ b/backend/tests/test_search_epc.py
@ -0,0 +1,59 @@
+import pytest
+import os
+from backend.SearchEpc import SearchEpc  # Replace with your actual module name
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+class TestSearchEpcIntegration:
+    @pytest.mark.parametrize(
+        "address, postcode, uprn, skip_os, expected_partial_address",
+        [
+            # Test case 1: Valid address and postcode, skipping OS
+            # In this case, the property is an individual flat but the uprn associated to the
+            # EPC is for the building as a whole, possibly because there was a conversion of sorts
+            ("Garden Flat, 48 Bedminster Parade", "BS3 4HS", 308249, True,
+             "260907a5431fa073d193cc6bbec51fbf1ba9a61845ab2503f85aa19ce3ed6afd", 1),
+
+            # Test case 2: Another valid address and postcode
+            # In this case, the newest EPC, does not have a uprn associated to it. If we did a search by
+            # uprn, we would get an old EPC
+            ("Flat 8, Hainton House", "DN32 9AQ", 10090082018, True,
+             "bd1149a20a73397184f07a9955f872424826e70f4870c058d71be887766ee1f8", 3),
+
+        ],
+    )
+    def test_find_property(self, address, postcode, uprn, skip_os, lmk_key, n_old_epcs):
+        """
+        Integration test for `find_property`, making actual API calls.
+        """
+        # Provide your actual API keys or tokens here
+        os_api_key = ""
+
+        # Initialize the SearchEpc instance
+        epc_searcher = SearchEpc(
+            address1=address,
+            postcode=postcode,
+            uprn=uprn,
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=os_api_key,
+        )
+
+        # Execute the method
+        epc_searcher.find_property(skip_os=skip_os)
+
+        # We check that we have the correct epc
+        assert epc_searcher.newest_epc["lmk-key"] == lmk_key
+        assert epc_searcher.newest_epc["uprn"] == uprn
+        assert len(epc_searcher.older_epcs) == n_old_epcs
+
+    def test_search_housenumber(self):
+        eg1 = 'Flat A11, Mortimer House, Grendon Road, Exeter'
+        res1 = SearchEpc.get_house_number(eg1, None)
+        assert res1 == "A11"
+
+        eg2 = 'Flat A9, Mortimer House, Grendon Road, Exeter, EX1 2NL'
+        res2 = SearchEpc.get_house_number(eg2, None)
+        assert res2 == "A9"
--- a/etl/access_reporting/app.py
+++ b/etl/access_reporting/app.py
@ -0,0 +1,440 @@
+import os
+from msal import ConfidentialClientApplication
+from datetime import datetime, timedelta
+import requests
+from functools import wraps
+import time
+import logging
+from io import BytesIO
+import pandas as pd
+
+# Configure logging
+logger = logging.getLogger(__name__)
+if not logger.handlers:
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+logger.setLevel(logging.INFO)
+
+
+def handle_error(response):
+    """
+    Handle errors based on HTTP status codes and log detailed information.
+    """
+    try:
+        error_json = response.json().get('error', {})
+    except ValueError:
+        error_json = {}
+
+    error_code = error_json.get('code', 'unknownError')
+    error_message = error_json.get('message', 'No detailed error message provided.')
+    inner_error = error_json.get('innererror', {})
+    details = error_json.get('details', [])
+
+    logger.error(f"Error Code: {error_code}")
+    logger.error(f"Error Message: {error_message}")
+    if inner_error:
+        logger.error(f"Inner Error: {inner_error}")
+    if details:
+        logger.error(f"Error Details: {details}")
+
+    if response.status_code == 401:
+        logger.error("Unauthorized. Token might be invalid.")
+    elif response.status_code == 403:
+        logger.error("Forbidden. Access denied to the requested resource.")
+    elif response.status_code == 404:
+        logger.error("Not Found. The requested resource doesn’t exist.")
+    elif response.status_code == 429:
+        retry_after = int(response.headers.get('Retry-After', 5))  # Default to 5 seconds if not provided
+        logger.warning(f"Too Many Requests. Retrying after {retry_after} seconds...")
+        time.sleep(retry_after)
+        return 'retry'
+    elif response.status_code in (500, 503):
+        retry_after = int(response.headers.get('Retry-After', 5))  # Default to 5 seconds if not provided
+        logger.error(f"Server error. Retrying after {retry_after} seconds...")
+        time.sleep(retry_after)
+        return 'retry'
+    else:
+        raise ValueError(f"API request failed with status code {response.status_code} - {error_message}")
+
+    raise ValueError(f"API request failed with status code {response.status_code} - {error_message}")
+
+
+def api_call_decorator(func):
+    """
+    Handles various aspects of the API call, including refreshing the access token if needed and handling pagination.
+    :param func: The function to be decorated.
+    :return: The wrapped function.
+    """
+
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        try:
+            # Check and refresh the access token if needed
+            if self.is_access_token_expired():
+                self.retrieve_access_token()
+                logger.info("Access token refreshed.")
+
+            # Get the HTTP method, URL, and optionally data from the function
+            http_method, url, data = func(self, *args, **kwargs)
+
+            # Initialize the results list and handle pagination if page_size is provided
+            results = []
+            page_size = kwargs.get('page_size', None)
+            response_data = {}
+            n_calls = 0
+
+            while url:
+                logger.info("Making call for page: " + str(n_calls + 1))
+                n_calls += 1
+                response = requests.request(http_method, url, headers=self.headers, json=data)
+
+                # Handle the response
+                if response.status_code == 200:
+                    response_json = response.json()  # Store the response JSON
+                    if page_size:
+                        results.extend(response_json.get('value', []))
+                        url = response_json.get('@odata.nextLink', None)
+                        logger.info(f"Next page URL: {url}")
+                    else:
+                        response_data = response_json  # Capture the full response for consistency
+                        break
+                else:
+                    retry = handle_error(response)
+                    if retry == 'retry':
+                        continue
+
+            if page_size:
+                response_data = {'value': results}
+
+            return response_data
+
+        except Exception as e:
+            logger.exception("An error occurred during the API call.")
+            raise e
+
+    return wrapper
+
+
+class SharePointClient:
+    access_token = None
+    access_token_request_timestamp = None
+    access_token_expiry = None
+    headers = None
+
+    TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+    def __init__(self, tenant_id, client_id, client_secret, site_id, access_token=None,
+                 access_token_expiration_details=None):
+        """
+        Initializes the SharePointClient with necessary credentials and site information.
+        :param tenant_id: The tenant ID.
+        :param client_id: The client ID.
+        :param client_secret: The client secret.
+        :param site_id: The site ID.
+        :param access_token: The access token (optional)
+        :param access_token_expiration_details: The access token expiration details (optional)
+        """
+        self.tenant_id = tenant_id
+        self.client_id = client_id
+        self.client_secret = client_secret
+
+        if access_token:
+            if not access_token_expiration_details:
+                raise ValueError("Access token expiration details must be provided.")
+            self.access_token = access_token
+            self.set_access_token_expiration_details(access_token_expiration_details)
+            self.headers = {
+                'Authorization': f"Bearer {self.access_token['access_token']}"
+            }
+        else:
+            self.retrieve_access_token()
+
+        # Retrieve static identifiers
+        self.site_id = site_id
+        self.document_drive = self.get_documents_drive()
+
+    def get_token_expiration_details(self):
+        """
+        Returns the access token expiration details. Converts the datetime objects to strings for serialization.
+        :return:
+        """
+        return {
+            'access_token_request_timestamp': datetime.strftime(
+                self.access_token_request_timestamp, self.TIMESTAMP_FORMAT
+            ),
+            'access_token_expiry': datetime.strftime(self.access_token_expiry, self.TIMESTAMP_FORMAT)
+        }
+
+    def set_access_token_expiration_details(self, access_token_expiration_details):
+        """
+        Sets the access token expiration details from a serialized dictionary.
+        :param access_token_expiration_details: The serialized access token expiration details.
+        :return:
+        """
+        self.access_token_request_timestamp = datetime.strptime(
+            access_token_expiration_details['access_token_request_timestamp'], self.TIMESTAMP_FORMAT
+        )
+        self.access_token_expiry = datetime.strptime(
+            access_token_expiration_details['access_token_expiry'], self.TIMESTAMP_FORMAT
+        )
+
+    def is_access_token_expired(self):
+        """
+        Checks if the access token has expired. If it has, a new access token is retrieved.
+        :return: True if expired, False otherwise.
+        """
+        return datetime.now() >= self.access_token_expiry
+
+    def retrieve_access_token(self, refresh=False):
+        """
+        Implements authentication using MSAL.
+        :param refresh: If True, force a refresh of the access token.
+        :return: None
+        """
+        app = ConfidentialClientApplication(
+            self.client_id,
+            authority=f"https://login.microsoftonline.com/{self.tenant_id}",
+            client_credential=self.client_secret
+        )
+
+        scope = ["https://graph.microsoft.com/.default"]
+
+        access_token_request_timestamp = datetime.now()
+
+        if refresh:
+            logger.info("Forcing refresh of access token.")
+            token = app.acquire_token_for_client(scopes=scope)
+        else:
+            # Check if a token is already cached
+            token = app.acquire_token_silent(scope, account=None)
+
+            if not token:
+                token = app.acquire_token_for_client(scopes=scope)
+
+        if "access_token" not in token:
+            logger.error("Authentication failed.")
+            raise ValueError("Authentication failed")
+
+        access_token_expiry = access_token_request_timestamp + timedelta(
+            seconds=token['expires_in'] - 20
+        )
+
+        self.access_token = token
+        self.access_token_request_timestamp = access_token_request_timestamp
+        self.access_token_expiry = access_token_expiry
+        self.headers = {
+            'Authorization': f"Bearer {self.access_token['access_token']}"
+        }
+
+        logger.info("Access token retrieved successfully.")
+
+    @api_call_decorator
+    def get_documents_drive(self):
+        """
+        Get the document drive of the SharePoint site.
+        :return: Tuple containing HTTP method, URL, and None for data.
+        """
+        url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drive"
+        logger.info(f"Getting document drive from URL: {url}")
+        return 'GET', url, None
+
+    @api_call_decorator
+    def list_folder_contents(self, drive_id, folder_path: str, page_size: int = 100):
+        """
+        This function will list the contents of a folder in SharePoint.
+        :param drive_id: The ID of the drive.
+        :param folder_path: The path of the folder.
+        :param page_size: The number of items per page (default is 100).
+        :return: Tuple containing HTTP method, URL, and None for data.
+        """
+        url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{folder_path}:/children?$top={page_size}"
+        logger.info(f"Listing folder contents from URL: {url}")
+        return 'GET', url, None
+
+    @staticmethod
+    def download_sharepoint_file(download_url):
+        """
+            Downloads a file from the given URL and returns its content.
+
+            :param download_url: The URL to download the file from.
+            :return: The content of the downloaded file.
+            """
+        response = requests.get(download_url, stream=True)
+        response.raise_for_status()  # Check if the request was successful
+
+        file_content = BytesIO()
+
+        # Read the file content into memory
+        for chunk in response.iter_content(chunk_size=8192):
+            file_content.write(chunk)
+
+        file_content.seek(0)  # Reset the file pointer to the beginning
+
+        return file_content
+
+    def download_sharepoint_folder(self, drive_id, folder_path, download_dir, excluded_file_types=None):
+        """
+        Downloads all files in a SharePoint folder to the specified local directory.
+
+        :param drive_id: The ID of the SharePoint drive.
+        :param folder_path: The path of the folder in SharePoint.
+        :param download_dir: The local directory to save the downloaded files.
+        :param excluded_file_types: A list of file types to exclude from download (default is None).
+        """
+
+        excluded_file_types = [] if excluded_file_types is None else excluded_file_types
+
+        # Ensure the download directory exists
+        os.makedirs(download_dir, exist_ok=True)
+
+        # List folder contents
+        folder_contents = self.list_folder_contents(drive_id, folder_path)
+        files = folder_contents.get('value', [])
+
+        for item in files:
+            if item.get('folder'):  # Check if it's a folder
+                # Recursively handle subfolders
+                subfolder_path = f"{folder_path}/{item['name']}"
+                subfolder_dir = os.path.join(download_dir, item['name'])
+                self.download_sharepoint_folder(drive_id, subfolder_path, subfolder_dir)
+            else:
+                # It's a file, download it
+                file_name = item['name']
+                if file_name.split(".")[-1] in excluded_file_types:
+                    continue
+                download_url = item['@microsoft.graph.downloadUrl']
+
+                logger.info(f"Downloading file: {file_name}")
+                file_content = self.download_sharepoint_file(download_url)
+
+                # Save the file locally
+                file_path = os.path.join(download_dir, file_name)
+                with open(file_path, 'wb') as f:
+                    f.write(file_content.read())
+
+                logger.info(f"File saved to: {file_path}")
+
+
+def app():
+    # Customers for WC 18/11/2024
+    #
+    # ----- Eastlight location -----
+    # No data this week, low on data
+    # Housing Associations/Eastlight/Survey Outcomes/
+    #
+    # ----- Settle location -----
+    # No data this week, in separate files
+    # Housing Associations/Settle/Survey Outcomes/
+    #
+    # ----- Community Housing -----
+    # In separate files - will we get to a singular form?
+    # Housing Associations/Community Housing/Survey Outcomes/
+    #
+    # ----- ACIS location -----
+    # Doesn't have this week's data
+    # Housing Asociation/ACIS/Survey Outcomes/ACIS Group - 25.11.2024 - USE THIS.xlsx
+    #
+    # ----- Southern location -----
+    #
+    #
+    # ------ Unitas location ------
+    # Does have this week's data
+    # Unitas location: Housing Associations/Unitas/Survey Outcomes/Unitas.xlsx
+
+    locations = {
+        "Unitas": "Housing Associations/Unitas/Survey Outcomes/Unitas.xlsx",
+        "Eastlight": "Housing Associations/Eastlight/Survey Outcomes/",
+        "Settle": "Housing Associations/Settle/Survey Outcomes/",
+        "Community Housing": "Housing Associations/Community Housing/Survey Outcomes/",
+        "ACIS": "Housing Asociation/ACIS/Survey Outcomes/ACIS Group - 25.11.2024 - USE THIS.xlsx",
+        "Southern": None,
+    }
+
+    SHAREPOINT_CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID", None)
+    SHAREPOINT_CLIENT_SECRET = os.getenv("SHAREPOINT_CLIENT_SECRET", None)
+    SHAREPOINT_TENANT_ID = os.getenv("SHAREPOINT_TENANT_ID", None)
+    WARMFRONT_SHAREPOINT_SITE_ID = os.getenv("WARMFRONT_SHAREPOINT_SITE_ID", None)
+
+    sharepoint_client = SharePointClient(
+        tenant_id=SHAREPOINT_TENANT_ID,
+        client_id=SHAREPOINT_CLIENT_ID,
+        client_secret=SHAREPOINT_CLIENT_SECRET,
+        site_id=WARMFRONT_SHAREPOINT_SITE_ID
+    )
+
+    results = []
+    for customer, location in locations.items():
+        if location is None:
+            continue
+
+        if location.endswith(".xlsx"):
+            # Read in the file
+            # List the contents of the folder
+            location_folder = os.path.dirname(location)
+            contents = sharepoint_client.list_folder_contents(
+                drive_id=sharepoint_client.document_drive["id"],
+                folder_path=location_folder
+            )
+            filepaths = contents["value"]
+
+            download_url = next(
+                (file['@microsoft.graph.downloadUrl'] for file in filepaths
+                 if '@microsoft.graph.downloadUrl' in file and file['name'] == os.path.basename(location)),
+                None
+            )
+
+            if download_url is None:
+                raise ValueError("File not found in the SharePoint folder.")
+
+            file_content = sharepoint_client.download_sharepoint_file(download_url)
+
+            # Convert to pandas dataframe since file is an excel file
+            df = pd.read_excel(file_content)
+            df["Outcome"] = df["Outcome"].str.strip().str.lower()
+
+            # We cannot group by funding type accurately because any job that is not funded will have a NaN value
+            # and therefore we have a 100% acces rate for funded jobs and 0% otherwise
+            surveyor_outcomes = []
+            for (week, surveyor, funding), group in df.groupby(["Week Commencing", "DEA/REA"]):
+                funding_type = [x for x in group["Funding Type"].unique() if not pd.isnull(x)]
+                if funding_type:
+                    funding_type = " + ".join(funding_type)
+                else:
+                    funding_type = "No Funding"
+                surveyed = group[group["Outcome"] == "surveyed"]
+                no_answer = group[
+                    group["Outcome"] == "no answer"
+                    ]
+                other_issue = group[~group["Outcome"].isin(["surveyed", "no answer"])]
+
+                surveyor_outcomes.append(
+                    {
+                        "Surveyor": surveyor,
+                        "Week": week,
+                        "Funding": funding_type,
+                        "Surveyed": surveyed.shape[0],
+                        "No Answer": no_answer.shape[0],
+                        "Other Issue": other_issue.shape[0],
+                    }
+                )
+
+            surveyor_outcomes = pd.DataFrame(surveyor_outcomes)
+            surveyor_outcomes["Week"] = pd.to_datetime(surveyor_outcomes["Week"])
+
+            weekly_access = (
+                surveyor_outcomes.drop(columns=["Surveyor"]).groupby(["Week", "Funding"]).sum().reset_index()
+            )
+            # Sort by week and surveyor ascending
+            surveyor_outcomes = surveyor_outcomes.sort_values(["Week", "Surveyor"], ascending=[True, True])
+            surveyor_outcomes["Access Rate"] = 100 * surveyor_outcomes["Surveyed"] / (
+                surveyor_outcomes["Surveyed"] + surveyor_outcomes["No Answer"] + surveyor_outcomes["Other Issue"]
+            )
+
+            weekly_access["Total"] = (
+                weekly_access["Surveyed"] + weekly_access["No Answer"] + weekly_access["Other Issue"]
+            )
+            weekly_access["Access Rate"] = 100 * weekly_access["Surveyed"] / (
+                weekly_access["Surveyed"] + weekly_access["No Answer"] + weekly_access["Other Issue"]
+            )
--- a/etl/access_reporting/requirements.txt
+++ b/etl/access_reporting/requirements.txt
@ -0,0 +1,11 @@
+python-docx==0.8.11
+PyPDF2==3.0.1
+boto3
+requests
+pandas
+pyarrow==12.0.1
+openpyxl==3.1.2
+usaddress==0.5.10
+pdfplumber==0.10.3
+msgpack==1.0.5
+msal
--- a/etl/costs/app.py
+++ b/etl/costs/app.py
@ -11,7 +11,7 @@ import inspect

 src_file_path = inspect.getfile(lambda: None)

-DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20240917 Hestia Materials.xlsx"
+DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20250316 Domna Materials.xlsx"
 # Environment file is at the same level as this file
 ENV_FILE = Path(src_file_path).parent / "etl" / "costs" / ".env"
 dotenv.load_dotenv(ENV_FILE)
@ -91,6 +91,7 @@ def app():
    lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
    flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
    window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)
+    rir_insulation_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="room_roof_insulation", header=0)

    # Form a single table to be uploaded
    costs = pd.concat(
@ -104,7 +105,8 @@ def app():
            ewi_costs,
            lel_costs,
            flat_roof_costs,
-            window_costs
+            window_costs,
+            rir_insulation_costs,
        ]
    )

--- a/etl/customers/aiha/bid_numbers.py
+++ b/etl/customers/aiha/bid_numbers.py
@ -0,0 +1,106 @@
+"""
+This is an adhoc script, used to pull together some of the figures that are being included in the
+Warm Homes: Social Housing Wave 3 funding application
+"""
+
+import pandas as pd
+import numpy as np
+
+aiha_all_units = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
+    sheet_name="All Properties - AIHA",
+    header=2
+)
+modelled_units = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
+    sheet_name="Modelled Properties - Measures",
+    header=5
+)
+aiha_all_units = aiha_all_units.drop(columns=['Unnamed: 0', 'Unnamed: 1'])
+aiha_extracted_property_data = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv"
+)
+aiha_wave_3_units = aiha_all_units[aiha_all_units["Expected Package Cost"].astype(float) > 0]
+# TODO: The EPC C property isn't a C!
+aiha_epc_breakdown = aiha_wave_3_units["Expected EPC Rating"].replace({"D or E": "E"}).value_counts()
+# For CAHA
+caha_epc_breakdown = modelled_units[
+    modelled_units['Survey Key'].str.contains("CAHA")
+]['Current EPC Rating'].value_counts()
+# For Hornsey
+hornsey_epc_breakdown = modelled_units[
+    modelled_units['Survey Key'].str.contains("HORNSEY")
+]['Current EPC Rating'].value_counts()
+
+aiha_original_asset_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/240924- KSQ & Domna Info Merge - AIHA - SHDF Wave 3 "
+    "bid - Supplementary information.xlsx",
+    sheet_name="Archetyping Data",
+    header=2
+)
+
+# Get the units in the bid:
+aiha_wave_3_features = aiha_original_asset_data[
+    ['Address letter or number', 'Street address', 'Postcode', "Wall type",
+     "Property type", "built-form", "floor"]
+].merge(
+    aiha_wave_3_units[['Address letter or number', 'Street address', 'Postcode']],
+    how="inner",
+    on=["Address letter or number", "Street address", "Postcode"]
+)
+
+wall_type_breakdown = aiha_wave_3_features["Wall type"].value_counts()
+property_type_breakdown = aiha_wave_3_features.groupby(["Property type", "floor"]).size().reset_index()
+
+aiha_wave_3_features[aiha_wave_3_features["Property type"] == "Flat"][["Street address", "Postcode"]]
+
+# 4   Yetev Lev Court   ...  Semi-Detached     mid  - Medium
+# B    86 Bethune Road  ...    Mid-Terrace     top. - Low
+# A    80 Bethune Road  ...    Mid-Terrace  ground. - Low
+# B    80 Bethune Road  ...             \n      \n  - Low
+# A   9 Clapton Common  ...  Semi-Detached  ground. - Low
+# C   9 Clapton Common  ...    End-Terrace      \n. - Low
+# B      89 Manor Road  ...             \n      \n. - Low
+# A  6 Northfield Road  ...       Detached     top. - Low
+# 13 Northfield Rd  ...  Semi-Detached      \n      - Low
+# A      73 Manor Road  ...    End-Terrace      \n  - Low
+# B      73 Manor Road  ...       Detached     top  - Low
+
+# Hornsey data - contained in original asset list
+hornsey_asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
+    "Trust.xlsx",
+    sheet_name="Ksquared-All units information",
+    header=3
+)
+
+# We don't need the first row
+hornsey_asset_list = hornsey_asset_list.iloc[1:]
+# Fill NA values with empty strings
+hornsey_asset_list = hornsey_asset_list.fillna("")
+hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype(
+    str
+).str.strip()
+hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip()
+hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip()
+# Replace double spaces
+for col in ["Address letter or number", "Street address", "Postcode"]:
+    hornsey_asset_list[col] = hornsey_asset_list[col].str.replace("  ", " ")
+
+hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
+
+hornsey_asset_list["Wall Type Cleaned"] = np.where(
+    hornsey_asset_list["Wall type"].str.contains("Cavity"),
+    "Cavity",
+    "Solid"
+)
+
+hornsey_asset_list["Property type"].value_counts()
+
+# CAHA
+caha_epc_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_extracted_property_data.xlsx"
+)
+
+caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["property_type"].value_counts()
+caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["wall_type"].value_counts()
--- a/etl/customers/aiha/xml_extraction.py
+++ b/etl/customers/aiha/xml_extraction.py
@ -0,0 +1,988 @@
+import os
+from io import BytesIO
+
+import pandas as pd
+
+from etl.xml_survey_extraction.XmlParser import XmlParser
+
+SURVEY_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/RESIDENT SURVEYS"
+CONTINGENCY_RATE = 0.26
+
+
+def sap_to_epc(sap_points: int | float):
+    """
+    Simple utility function to convert SAP points to EPC rating.
+    :param sap_points: numerical value of SAP points, typically between 0 and 100
+    :return:
+    """
+
+    if sap_points <= 0:
+        raise ValueError("SAP points should be above 0.")
+
+    if sap_points >= 92:
+        return "A"
+    elif sap_points >= 81:
+        return "B"
+    elif sap_points >= 69:
+        return "C"
+    elif sap_points >= 55:
+        return "D"
+    elif sap_points >= 39:
+        return "E"
+    elif sap_points >= 21:
+        return "F"
+    else:
+        return "G"
+
+
+def main():
+    """
+    This script handles the extraction of data from the XML files in the survey folders.
+    :return:
+    """
+    # Step 1: List all subfolders inside SURVEY_FOLDER_PATH.
+    subfolders = [f.path for f in os.scandir(SURVEY_FOLDER_PATH) if f.is_dir()]
+
+    # Step 2: Loop through each subfolder and find the XML files.
+    extracted_surveys = []
+    for subfolder in subfolders:
+        print(f"Searching in subfolder: {subfolder}")
+
+        # Find all XML files in the current subfolder.
+        xml_files = [f for f in os.listdir(subfolder) if f.endswith('.xml')]
+
+        if not xml_files:
+            print(f"No XML files found in subfolder: {subfolder}")
+            continue
+
+        # If any XML files are found, perform the data extraction. We use the subfolder name as the survey key.
+        for xml_file in xml_files:
+            xml_path = os.path.join(subfolder, xml_file)
+            print(f"Processing XML file: {xml_path}")
+
+            # Read in the XML and parse it using the XmlParser class.
+            with open(xml_path, 'rb') as file:
+                xml_data_io = BytesIO(file.read())
+            uprn = None  # Set the UPRN if available.
+
+            # Create an XmlParser instance
+            xml_parser = XmlParser(
+                file=xml_data_io,
+                filekey=xml_path,
+                surveyor_company="",
+                uprn=uprn,
+            )
+
+            # Run the parser to extract the data
+            xml_parser.run()
+            if not xml_parser.epc:
+                # If we don't have a lig xml
+                continue
+
+            # Store the extracted data for further processing
+            extracted_surveys.append({
+                "survey_key": subfolder.split("/")[-1],
+                **xml_parser.epc,
+                **xml_parser.additional_data
+            })
+
+    print(f"Extracted {len(extracted_surveys)} surveys.")
+    # Process the extracted_surveys as needed, for example, save to a database or write to a file.
+    extracted_surveys = pd.DataFrame(extracted_surveys)
+
+    # THis is the data we need for the AIHA project
+    measures_data = extracted_surveys[
+        ["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating",
+         "number_of_floors", "walls-description", "property-type", "built-form"]
+    ]
+    measures_data = measures_data.sort_values("survey_key", ascending=True)
+    measures_data.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv",
+    )
+
+    # Note:
+    # The properties will still have "Very poor" ratings for their hot water
+
+    # TODO
+    #   - AIH001-03 has a loft that is inaccessible - ask Chenai about why this property didn't have access to the loft
+    #     [Can't remember, not clear - Chenai will check]
+    #   - AIH001-08 and AIH001-09, check if it's freehold - could solar work as both of these units are part of the same
+    #     buulding [Question for Lewis & Kevin]
+    #   - AIH001-09 - Is it not possible to install a loft hatch? [IT IS NOT, NO ACCESS - would need to accessed from
+    #   the other unit]
+    #   - AIH001-09 - Is there definitely an immersion water heater? Is this definitely the case for the other units?
+    #       [Question for Lewis & Kevin] - [YES - ASHP!!!!]
+
+    # TODO: Check which properties are in a conservation area
+    # TODO: AIH001-16 - Is the loft insulation suitable (already has 100mm in the RIR)
+    # TODO: Adjust Archetype 14 homes to exclude double glazing? Or should we exclude entirely
+
+    recommended_measures = [
+        {
+            "survey_key": "AIH001-01",
+            "starting_sap": 69,
+            "recommended_measures": [],
+            "notes": "Is EPC C"
+        },
+        {
+            "survey_key": "AIH001-02",
+            "starting_sap": 65,
+            "recommended_measures": [
+                {
+                    "measure": "Solar PV",
+                    "description": "2.4kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "2.4W",
+                            "orientation": "Horizontal",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 7,
+                    "ending_sap": 72,
+                    "notes": "The array can be mounted on the flat roof, so that panels are south facing"
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 4,
+                    "ending_sap": 76
+                }
+            ],
+        },
+        {
+            "survey_key": "AIH001-03",
+            "starting_sap": 43,
+            "recommended_measures": [
+                {
+                    "measure": "Cylinder Insulation",
+                    "description": "80mm cylinder insulation",
+                    "sap_points": 1,
+                    "ending_sap": 44,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "4kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "4kWp",
+                            "orientation": "East",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        },
+                    ],
+                    "sap_points": 10,
+                    "ending_sap": 54
+                },
+                {
+                    "measure": "Air Source Heat Pump",
+                    "description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)",
+                    "sap_points": 20,
+                    "ending_sap": 74
+                },
+                {
+                    "measure": "Tariff Review",
+                    "description": "Switch to 24-hour tariff",
+                    "sap_points": 15,
+                    "ending_sap": 89
+                }
+            ],
+            "notes": "Unclear if the loft is accessible"
+        },
+        {
+            "survey_key": "AIH001-04",
+            "starting_sap": 48,
+            "recommended_measures": [
+                {
+                    "measure": "Flat Roof Insulation",
+                    "description": "100mm flat roof insulation",
+                    "floor_area": 39.1482,  # based on area of top floor
+                    "sap_points": 4,
+                    "ending_sap": 52
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 3,
+                    "ending_sap": 55
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "4kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "4kWp",
+                            "orientation": "South",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 15,
+                    "ending_sap": 70
+                }
+            ],
+            "notes": "Roof is flat, PV array should be installed south facing with elevation"
+        },
+        {
+            "survey_key": "AIH001-05",
+            "starting_sap": 54,
+            "recommended_measures": [
+                {
+                    "measure": "Flat Roof Insulation",
+                    "description": "100mm flat roof insulation",
+                    "floor_area": 49.48,  # based on area of top floor
+                    "sap_points": 5,
+                    "ending_sap": 59,
+                },
+                {
+                    "measure": "Cylinder Insulation",
+                    "description": "80mm cylinder insulation",
+                    "sap_points": 2,
+                    "ending_sap": 61,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "4kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "4kW",
+                            "orientation": "Horizontal",
+                            "elavation": 30,
+                            "overshading": "Modest",
+                        }
+                    ],
+                    "sap_points": 9,
+                    "ending_sap": 70
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 3,
+                    "ending_sap": 73
+                }
+            ],
+            "notes": ""
+        },
+        {
+            "survey_key": "AIH001-06",
+            "starting_sap": 62,
+            "recommended_measures": [
+                {
+                    "measure": "Cylinder Insulation",
+                    "description": "80mm cylinder insulation",
+                    "sap_points": 2,
+                    "ending_sap": 64,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "2kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "2kW",
+                            "orientation": "South",
+                            "elavation": 30,
+                            "overshading": "Modest",
+                        }
+                    ],
+                    "sap_points": 6,
+                    "ending_sap": 70
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-07",
+            "starting_sap": 74,
+            "recommended_measures": [],
+            "notes": "Is EPC C"
+        },
+        {
+            "survey_key": "AIH001-08",
+            "starting_sap": 56,
+            "recommended_measures": [
+                {
+                    "measure": "Loft Insulation",
+                    "description": "300mm loft insulation",
+                    "floor_area": 54.2864,  # Based on area of top floor
+                    "sap_points": 2,
+                    "ending_sap": 58,
+                },
+                {
+                    "measure": "Cylinder Insulation",
+                    "description": "80mm cylinder insulation",
+                    "sap_points": 4,
+                    "ending_sap": 62,
+                },
+                {
+                    "measure": "Internal Wall Insulation",
+                    "description": "100mm internal wall insulation",
+                    "hlp": 24.13 * 2.63,
+                    "sap_points": 7,
+                    "ending_sap": 69,
+                },
+                {
+                    "measure": "Ventilation",
+                    "description": "2x DMEV fans",
+                    "sap_points": 0,
+                    "ending_sap": 69,
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-09",
+            "starting_sap": 44,
+            "recommended_measures": [
+                {
+                    "measure": "Internal Wall Insulation",
+                    "description": "100mm internal wall insulation",
+                    "hlp": (22.35 * 3.24) + (22.13 * 2.53),
+                    "sap_points": 8,
+                    "ending_sap": 52,
+                },
+                {
+                    "measure": "Cavity Wall Insulation",
+                    "description": "CWI to rdSAP default standard",
+                    "hlp": (2.68 * 2.39) + (5.93 * 2.63) + (6.13 * 2.39),  # 1st & 2nd extension
+                    "sap_points": 1,
+                    "ending_sap": 53,
+                },
+                {
+                    "measure": "Ventilation",
+                    "description": "2x DMEV fans",
+                    "sap_points": 0,
+                    "ending_sap": 53,
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 3,
+                    "ending_sap": 56,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "1.6kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "1.6W",
+                            "orientation": "South-East",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 6,
+                    "ending_sap": 62
+                },
+                {
+                    "measure": "Loft Insulation",
+                    "description": "300mm loft insulation",
+                    "floor_area": 63.59 + 12.31,  # Based on area of main building and 1st extension
+                    "sap_points": 8,
+                    "ending_sap": 70,
+                    "notes": "Loft is inaccessible from this unit - would need to be accessed from the other unit, "
+                             "which is also owned by AIHA"
+                }
+            ],
+            "notes": "This property is a house split into 2 flats. We can install a PV array for both units (one array"
+                     "per unit). Area on south-east part of roof is ~22m2 with no overshadowing. Flat roof area is 8m2"
+                     "with modest overshadowing. We suggest a 3.2kWp system, across two units"
+        },
+        {
+            "survey_key": "AIH001-11",
+            "starting_sap": 59,
+            "recommended_measures": [
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 4,
+                    "ending_sap": 63,
+                },
+                {
+                    "measure": "Internal Wall Insulation",
+                    "description": "100mm internal wall insulation",
+                    "hlp": (18.50 * 3.12) + (19.00 * 2.75),
+                    "sap_points": 5,
+                    "ending_sap": 68,
+                },
+                {
+                    "measure": "Cylinder Insulation",
+                    "description": "80mm cylinder insulation",
+                    "sap_points": 1,
+                    "ending_sap": 69,
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-12",
+            "starting_sap": 46,
+            "recommended_measures": [
+                {
+                    "measure": "Double Glazing",
+                    "description": "Installation of double glazing",
+                    "n_windows": 20,  # Counted the bay windows each as 3
+                    "windows_area": 10.66,
+                    "sap_points": 3,
+                    "ending_sap": 49,
+                },
+                # {
+                #     "measure": "Solar PV",
+                #     "description": "3.2kWp Solar PV system",
+                #     "config": [
+                #         {
+                #             "size": "3.2W",
+                #             "orientation": "East",
+                #             "elavation": 30,
+                #             "overshading": "Little or none",
+                #         }
+                #     ],
+                #     "sap_points": 9,
+                #     "ending_sap": 58
+                # },
+                {
+                    "measure": "Air Source Heat Pump",
+                    "description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)",
+                    "sap_points": 15,
+                    "ending_sap": 65
+                },
+                {
+                    "measure": "Tariff Review",
+                    "description": "Switch to 24-hour tariff",
+                    "sap_points": 15,
+                    "ending_sap": 80
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-13",
+            "starting_sap": 53,
+            "recommended_measures": [
+                {
+                    "measure": "Roof Insulation",
+                    "description": "100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)",
+                    "floor_area": 39.75,  # based on the floor area of the RIR
+                    "sap_points": 6,
+                    "ending_sap": 59,
+                },
+                {
+                    "measure": "Flat Roof Insulation",
+                    "description": "100mm flat roof insulation",
+                    "floor_area": 33.06,  # Based on area of the extension
+                    "sap_points": 2,
+                    "ending_sap": 61,
+                },
+                {
+                    "measure": "Cavity Wall Insulation",
+                    "description": "CWI to rdSAP default standard",
+                    "hlp": (35.40 * 2.65) + (26.70 * 2.73) + (16.30 * 2.71),  # 1st & 2nd extension
+                    "sap_points": 6,
+                    "ending_sap": 67,
+                },
+                {
+                    "measure": "Ventilation",
+                    "description": "2x DMEV fans",
+                    "sap_points": 0,
+                    "ending_sap": 67,
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 2,
+                    "ending_sap": 69,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "4kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "4kW",
+                            "orientation": "Horizontal",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 9,
+                    "ending_sap": 78
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-14",
+            "starting_sap": 63,
+            "recommended_measures": [
+                {
+                    "measure": "Cavity Wall Insulation",
+                    "description": "CWI to rdSAP default standard",
+                    "hlp": (11.00 * 2.6) + (11.00 * 2.65) + (4.60 * 2.7),
+                    "sap_points": 5,
+                    "ending_sap": 68,
+                },
+                {
+                    "measure": "Ventilation",
+                    "description": "2x DMEV fans",
+                    "sap_points": 0,
+                    "ending_sap": 68,
+                },
+                {
+                    "measure": "Loft Insulation",
+                    "description": "300mm loft insulation",  # Based on area of main building
+                    "floor_area": 59.20,
+                    "sap_points": 1,
+                    "ending_sap": 69,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "3.2kWp Solar PV system",
+                    "sap_points": 10,
+                    "ending_sap": 79,
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-15",
+            "starting_sap": 60,
+            "recommended_measures": [
+                {
+                    "measure": "Loft Insulation",
+                    "description": "300mm loft insulation",
+                    "floor_area": 73.81,  # Based on area of main building
+                    "sap_points": 1,
+                    "ending_sap": 61,
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 3,
+                    "ending_sap": 64,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "3.2kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "3.2W",
+                            "orientation": "North-West",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 7,
+                    "ending_sap": 71,
+                    "notes": "The array is North-west facing and therefore will be slightly less efficient than south"
+                             "facing, however the impact is not so severe as to make the installation not worthwhile."
+                             "Ground mounted"
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-16",
+            "starting_sap": 60,
+            "recommended_measures": [
+                {
+                    "measure": "Cavity Wall Insulation",
+                    "description": "CWI to rdSAP default standard",
+                    "hlp": (21.56 * 2.60) + (26.79 * 2.8) + (6.74 * 2.60),
+                    "sap_points": 4,
+                    "ending_sap": 64,
+                },
+                {
+                    "measure": "Ventilation",
+                    "description": "2x DMEV fans",
+                    "sap_points": 0,
+                    "ending_sap": 64,
+                },
+                {
+                    "measure": "Loft Insulation",
+                    "description": "300mm loft insulation",
+                    "floor_area": 20.92,  # Based on floor area of RIR
+                    "sap_points": 1,
+                    "ending_sap": 65,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "2.4kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "2.4W",
+                            "orientation": "South-East",
+                            "elavation": 30,
+                            "overshading": "Modest",
+                        }
+                    ],
+                    "sap_points": 5,
+                    "ending_sap": 70,
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-17",
+            "starting_sap": 62,
+            "recommended_measures": [
+                {
+                    "measure": "Cylinder Insulation",
+                    "description": "80mm cylinder insulation",
+                    "sap_points": 1,
+                    "ending_sap": 63,
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 3,
+                    "ending_sap": 66,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "4kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "3.2kW",
+                            "orientation": "East",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        },
+                        {
+                            "size": "0.8kW",
+                            "orientation": "West",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 12,
+                    "ending_sap": 78,
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-18",
+            "starting_sap": 58,
+            "recommended_measures": [
+                {
+                    "measure": "Loft Insulation",
+                    "description": "300mm loft insulation",
+                    "floor_area": 37.52,  # Based on area of main building and 1st extension
+                    "sap_points": 7,
+                    "ending_sap": 65,
+                },
+                {
+                    "measure": "Cylinder Insulation",
+                    "description": "80mm cylinder insulation",
+                    "sap_points": 1,
+                    "ending_sap": 66,
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 2,
+                    "ending_sap": 68,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "3.2kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "3.2W",
+                            "orientation": "North-East",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 7,
+                    "ending_sap": 75,
+                }
+            ],
+
+        },
+        {
+            "survey_key": "AIH001-19",
+            "starting_sap": 76,
+            "recommended_measures": []
+        },
+        {
+            "survey_key": "AIH001-20",
+            "starting_sap": 82,
+            "recommended_measures": []
+        },
+        {
+            "survey_key": "AIH001-21",
+            "starting_sap": 53,
+            "recommended_measures": [
+                {
+                    "measure": "Cylinder Insulation",
+                    "description": "80mm cylinder insulation",
+                    "sap_points": 2,
+                    "ending_sap": 55,
+                },
+                {
+                    "measure": "Roof Insulation",
+                    "description": "100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)",
+                    "floor_area": 22.80,  # Based on floor area of RIR
+                    "sap_points": 7,
+                    "ending_sap": 62,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "2.4kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "1.6kWp",
+                            "orientation": "Horizontal",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        },
+                        {
+                            "size": "0.8kWp",
+                            "orientation": "South-East",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 9,
+                    "ending_sap": 71,
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 3,
+                    "ending_sap": 74,
+                }
+            ]
+        },
+        {
+            "survey_key": "AIH001-SIMULATED-01",
+            "elmhurst_reference": "000020",
+            "starting_sap": None,
+            "recommended_measures": [
+                {
+                    "measure": "Internal Wall Insulation",
+                    "description": "100mm internal wall insulation",
+                    "hlp": (22.35 * 3.24) + (22.13 * 2.53),
+                    "sap_points": 8,
+                    "ending_sap": 52,
+                },
+                {
+                    "measure": "Cavity Wall Insulation",
+                    "description": "CWI to rdSAP default standard",
+                    "hlp": (2.68 * 2.39) + (5.93 * 2.63) + (6.13 * 2.39),  # 1st & 2nd extension
+                    "sap_points": 1,
+                    "ending_sap": 53,
+                },
+                {
+                    "measure": "Ventilation",
+                    "description": "2x DMEV fans",
+                    "sap_points": 0,
+                    "ending_sap": 53,
+                },
+                {
+                    "measure": "TTZC",
+                    "description": "Smart Thermostat",
+                    "sap_points": 3,
+                    "ending_sap": 56,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "1.6kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "1.6W",
+                            "orientation": "South-East",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 6,
+                    "ending_sap": 62
+                },
+                {
+                    "measure": "Loft Insulation",
+                    "description": "300mm loft insulation",
+                    "floor_area": 63.59 + 12.31,  # Based on area of main building and 1st extension
+                    "sap_points": 8,
+                    "ending_sap": 70,
+                    "notes": "Loft is inaccessible from this unit - would need to be accessed from the other unit, "
+                             "which is also owned by AIHA"
+                }
+            ],
+            "notes": "This was cloned from 80A. There is no existing data for 80B"
+        },
+        {
+            "survey_key": "AIH001-SIMULATED-05",
+            "starting_sap": 68,
+            "recommended_measures": [
+                {
+                    "measure": "Loft Insulation",
+                    "description": "300mm loft insulation",
+                    "floor_area": 42.5,
+                    "sap_points": 1,
+                    "ending_sap": 69,
+                },
+                {
+                    "measure": "Solar PV",
+                    "description": "3.2kWp Solar PV system",
+                    "config": [
+                        {
+                            "size": "3.2W",
+                            "orientation": "North-East",
+                            "elavation": 30,
+                            "overshading": "None or little",
+                        }
+                    ],
+                    "sap_points": 8,
+                    "ending_sap": 77,
+                }
+            ]
+        }
+    ]
+
+    scaffolding_data = [
+        {
+            "number_of_floors": 2,
+            "price": 841,
+        },
+        {
+            "number_of_floors": 3,
+            "price": 1077,
+        }
+    ]
+
+    # TODO - Need an update cost for cylinder insulation
+    pricing_data = [
+        {'item': '80mm cylinder insulation', 'unit_price': 50, 'unit': 'unit'},
+        {'item': '100mm internal wall insulation', 'unit_price': 244.8, 'unit': 'hlp_m2'},
+        {'item': 'CWI to rdSAP default standard', 'unit_price': 14.21, 'unit': 'hlp_m2'},
+        {'item': 'Window draught proofing improvements', 'unit_price': 63, 'unit': 'window'},
+        {'item': '100mm flat roof insulation', 'unit_price': 195, 'unit': 'floor_m2'},
+        {'item': 'Switch to 24-hour tariff', 'unit_price': 0, 'unit': None},
+        {'item': 'Installation of double glazing', 'unit_price': 1074, 'unit': 'window'},
+        {'item': 'Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)', 'unit_price': 21189 + 1200,
+         'unit': 'unit'},
+        {'item': '100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)', 'unit_price': 244.80,
+         'unit': 'floor_m2'},
+        {'item': '300mm loft insulation', 'unit_price': 16.07, 'unit': 'floor_m2'},
+        {'item': 'Smart Thermostat', 'unit_price': 1200, 'unit': 'unit'},
+        {'item': '2x DMEV fans', 'unit_price': 1070, 'unit': 'unit'},
+        {'item': '1.6kWp Solar PV system', 'unit_price': 3040, 'unit': 'unit_needs_scaffolding'},
+        {'item': '2kWp Solar PV system', 'unit_price': 3201, 'unit': 'unit_needs_scaffolding'},
+        {'item': '2.4kWp Solar PV system', 'unit_price': 3363, 'unit': 'unit_needs_scaffolding'},
+        {'item': '3.2kWp Solar PV system', 'unit_price': 3686, 'unit': 'unit_needs_scaffolding'},
+        {'item': '4kWp Solar PV system', 'unit_price': 4009, 'unit': 'unit_needs_scaffolding'},
+        {'item': '5.6kWp Solar PV system', 'unit_price': 5015, 'unit': 'unit_needs_scaffolding'},
+    ]
+    pricing_data = pd.DataFrame(pricing_data)
+
+    for recommendation in recommended_measures:
+        property_data = measures_data[measures_data["survey_key"] == recommendation["survey_key"]].squeeze()
+        total_cost = 0
+
+        for measure in recommendation["recommended_measures"]:
+            measure_pricing = pricing_data[pricing_data["item"] == measure["description"]]
+            measure_unit = measure_pricing["unit"].values[0]
+
+            if measure_unit in ["unit", None]:
+                measure_cost = float(measure_pricing["unit_price"].values[0])
+            elif measure_unit == "unit_needs_scaffolding":
+                n_floors = property_data["number_of_floors"]
+                scaffolding_cost = [x for x in scaffolding_data if x["number_of_floors"] == n_floors][0]["price"]
+                measure_cost = float(measure_pricing["unit_price"].values[0]) + scaffolding_cost
+            elif measure_unit == "floor_m2":
+                measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["floor_area"]
+            elif measure_unit == "hlp_m2":
+                measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["hlp"]
+            elif measure_unit == "window":
+                measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["n_windows"]
+            else:
+                raise Exception("Unknown unit type")
+
+            measure["Total Cost"] = measure_cost
+            total_cost += measure_cost
+
+        recommendation["total_cost"] = total_cost
+
+    # Step 1: Normalize the recommended_measures data into a DataFrame.
+    normalized_measures = []
+    for survey in recommended_measures:
+        survey_key = survey["survey_key"]
+        starting_sap = survey["starting_sap"]
+        total_cost = survey.get("total_cost", 0)
+
+        for measure in survey.get("recommended_measures", []):
+            # Include hlp and floor_area for each measure if available
+            hlp = measure.get("hlp", None)
+            floor_area = measure.get("floor_area", None)
+
+            normalized_measures.append({
+                "survey_key": survey_key,
+                "hlp": hlp,
+                "floor_area": floor_area,
+                "starting_sap": starting_sap,
+                "measure": measure["measure"],
+                "description": measure.get("description", ""),
+                "sap_points": measure.get("sap_points", 0),
+                "measure_cost": measure.get("Total Cost", 0),
+                "total_cost": total_cost
+            })
+
+    # Convert the normalized list into a DataFrame.
+    measures_df = pd.DataFrame(normalized_measures)
+
+    # Step 2: Pivot the measures_df to have a column for each measure type, using the description as values.
+    pivoted_measures = measures_df.pivot_table(
+        index="survey_key",
+        columns="measure",
+        values="description",
+        aggfunc=lambda x: ' '.join(x),  # Concatenate descriptions if there are multiple entries.
+        fill_value=None
+    ).reset_index()
+
+    measures_columns = [x for x in pivoted_measures.columns if x not in ["survey_key"]]
+    # We add a "Cost of" column for each measure
+    for measure in measures_columns:
+        pivoted_measures[f"Cost of {measure}"] = None
+
+    pivoted_floor_area = measures_df.pivot_table(
+        index="survey_key",
+        columns="measure",
+        values="floor_area",
+        aggfunc="first"  # Use 'first' since each measure should only appear once per survey_key
+    ).add_prefix("floor_area - ").reset_index()
+
+    pivoted_hlp = measures_df.pivot_table(
+        index="survey_key",
+        columns="measure",
+        values="hlp",
+        aggfunc="first"
+    ).add_prefix("hlp - ").reset_index()
+
+    # Merge hlp and floor_area data
+    pivoted_measures = pivoted_measures.merge(pivoted_hlp, on="survey_key", how="left")
+    pivoted_measures = pivoted_measures.merge(pivoted_floor_area, on="survey_key", how="left")
+
+    # Step 3: Calculate the total sap points and total cost for each survey.
+    totals = measures_df.groupby("survey_key").agg(
+        total_sap_points=("sap_points", "sum"),
+    ).reset_index()
+
+    # Merge total sap points into the pivoted measures.
+    pivoted_measures = pd.merge(pivoted_measures, totals, on="survey_key", how="left")
+    # pivoted_measures["Cost Contingency"] = pivoted_measures["total_cost_of_measures"] * CONTINGENCY_RATE
+    # pivoted_measures["Total Cost"] = pivoted_measures["total_cost_of_measures"] + pivoted_measures["Cost Contingency"]
+
+    # Step 4: Extract starting SAP for each survey key.
+    starting_sap_df = measures_df.drop_duplicates(subset=["survey_key"])[["survey_key", "starting_sap"]]
+
+    # Merge starting SAP back onto pivoted measures.
+    result_df = pd.merge(pivoted_measures, starting_sap_df, on="survey_key", how="left")
+
+    # Step 5: Calculate the ending SAP.
+    result_df["Ending SAP"] = result_df["starting_sap"] + result_df["total_sap_points"]
+    result_df["Ending EPC Rating"] = result_df["Ending SAP"].apply(sap_to_epc)
+
+    # Step 6: Merge the result with the measures_data to get the final DataFrame.
+    final_measures = measures_data.merge(
+        result_df, how="left", on="survey_key"
+    )
+
+    final_measures.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Measures packages.csv")
+
+    # Store costs
+    pricing_data.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Pricing data.csv")
+
+# if __name__ == "__main__":
+#     main()
--- a/etl/customers/benyon/epc_data.py
+++ b/etl/customers/benyon/epc_data.py
@ -0,0 +1,71 @@
+"""
+Rough script to get the EPC data for Benyon
+"""
+
+import pandas as pd
+import os
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from asset_list.utils import get_data
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Benyon Estate/List of All Properties ecl Grd Rents in "
+    "Alphabetical Order.xlsx",
+    header=1
+)
+asset_list.columns = ["tennancy", "landlord_id", "landlord_address"]
+# Get postcode as the last 2 parts of the address, split on space
+asset_list["postcode"] = asset_list["landlord_address"].apply(lambda x: x.split(" ")[-2] + " " + x.split(" ")[-1])
+
+asset_list["house_no"] = asset_list.apply(
+    lambda x: SearchEpc.get_house_number(address=x["landlord_address"], postcode=x["postcode"]), axis=1
+)
+
+epc_data, errors, no_epc = get_data(
+    df=asset_list,
+    manual_uprn_map={},
+    epc_auth_token=EPC_AUTH_TOKEN,
+    uprn_column=None,
+    fulladdress_column="landlord_address",
+    address1_column="house_no",
+    postcode_column="postcode",
+    property_type_column=None,
+    built_form_column=None,
+    epc_api_only=True,
+    row_id_name="landlord_id",
+)
+
+df = asset_list[asset_list["landlord_id"].isin(no_epc)]
+epc_df = pd.DataFrame(epc_data)
+epc_df["current-energy-rating"].value_counts()
+epc_df["property-type"].value_counts()
+epc_df["walls-description"].value_counts(normalize=True)
+
+asset_list = asset_list.merge(
+    epc_df[
+        [
+            "landlord_id", "current-energy-rating", "property-type", "total-floor-area", "roof-description",
+            "walls-description", "co2-emissions-current"
+        ]
+    ],
+    how="left",
+    left_on="landlord_id",
+    right_on="landlord_id"
+)
+asset_list.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Benyon Estate/asset_list.csv", index=False
+)
+
+asset_list_big = asset_list.merge(
+    epc_df,
+    how="left",
+    left_on="landlord_id",
+    right_on="landlord_id"
+)
+asset_list_big.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Benyon Estate/asset_list_full_data.csv",
+    index=False
+)
--- a/etl/customers/bromford/data_cleanup.py
+++ b/etl/customers/bromford/data_cleanup.py
@ -0,0 +1,192 @@
+"""
+12th April 2025
+This script attempts to clean up the various pieces of data we have for Bromford, with the intention of producing a
+standardised asset list
+"""
+
+import pandas as pd
+
+# Step 1
+# The inspectons data is spread across three different files. We attempt to produce one finalised asset list, with
+# comprehensive inspections
+
+# Primary asset list
+asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford Asset "
+    "List.xlsx",
+    sheet_name="Asset List"
+)
+
+#
+inspections_1 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
+    "MDS.xlsx",
+    sheet_name="Data list"
+)
+inspections_1["Heating Type"] = (inspections_1["Heating Type"] + " " + inspections_1["Heating fuel"]).str.strip()
+
+inspections_2 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
+    "MERLIN LANE.xlsx",
+    sheet_name="Report"
+)
+inspections_2["AssetTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[-1]
+inspections_2["PropTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[:-1].str.join(" ")
+
+inspections_3 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
+    "SEVERN VALE - KLARKE.xlsx",
+    sheet_name="Asset report"
+)
+
+inspections_3["FullAddress"] = inspections_3["T1_Address1"] + ", " + inspections_3["T1_Address2"]
+
+# On inspections 3, we have multiple sheets which describe the heating
+heating_systems = []
+for sheet_name in [
+    "Storage Heaters", "No Heating", "Underfloor Heating", "Rointe Electric Heating", "Air Source Heating",
+    "Gas Central Heating", "Electric Boiler", "Oil Fired Central Heating",
+    "Communal Boilers", "Panel Heaters"
+]:
+    df = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
+        "Rebuild/Inspections/BROMFORD "
+        "SEVERN VALE - KLARKE.xlsx",
+        sheet_name=sheet_name
+    )
+    df = df[["UPRN"]]
+    df["Heating Type"] = sheet_name
+    heating_systems.append(df)
+
+heating_systems = pd.concat(heating_systems)
+# We have no clue which one is correct, we have some dupes
+heating_systems = heating_systems.drop_duplicates("UPRN")
+heating_systems = heating_systems.rename(columns={"UPRN": "Asset"})
+heating_systems["Asset"] = heating_systems["Asset"].astype(int)
+
+inspections_3 = inspections_3.merge(heating_systems, how="left", on="Asset")
+
+# Create a consolidated inspections sheet
+inspections = pd.concat(
+    [
+        inspections_1[["Asset", "Construction type", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
+        inspections_2[["Asset", "Construction type", "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
+        inspections_3[["Asset", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
+    ]
+)
+
+inspections_address_data = pd.concat(
+    [
+        inspections_1[
+            ["Asset", "FullAddress", "PostCode", "ConYear", "Beds", "AssetTypeDesc", "PropTypeDesc", 'ManAreaDesc', ]
+        ],
+        inspections_2[
+            ['Asset', 'FullAddress', 'AccomType', "AssetTypeDesc", "PropTypeDesc", 'ConYear', 'Postcode']
+        ].rename(columns={"Postcode": "PostCode"}),
+        inspections_3[
+            ['Asset', "FullAddress", 'T1_Postcode', 'T1_Build Year', 'T1_AssetType']
+        ].rename(
+            columns={"T1_Postcode": "PostCode", "T1_Build Year": "ConYear", "T1_AssetType": "AssetTypeDesc"}
+        ),
+    ]
+)
+
+# Remove some error values
+inspections = inspections[~inspections["Asset"].isin(
+    [
+        "They're all green partial fill they're all green this",
+        "South Staffordshire District Council",
+        'Blk Milton Crt F9-10, Perton, Wolverhampton'
+    ]
+)]
+
+inspections["Asset"] = inspections["Asset"].astype(str)
+asset_list["Asset"] = asset_list["Asset"].astype(str)
+inspections_address_data["Asset"] = inspections_address_data["Asset"].astype(str)
+inspections['WFT Findings'] = inspections['WFT Findings'].replace(r'^\s*$', pd.NA, regex=True)
+
+# We have some cases where the inspetions data has dupes on Asset (the ID column). We take the instance that is
+# populated
+inspections = inspections.sort_values(by='WFT Findings', na_position='last')
+inspections = inspections.drop_duplicates(subset='Asset', keep='first')
+
+# We have dupes in the asset list
+asset_list = asset_list.drop_duplicates("Asset")
+
+# Merge on
+missed_asset_ids = inspections[
+    ~inspections["Asset"].isin(asset_list["Asset"].values)
+]["Asset"].values
+
+missed_assets = inspections_address_data[
+    inspections_address_data["Asset"].isin(missed_asset_ids)
+]
+missed_assets = missed_assets.drop_duplicates("Asset")
+
+# We produce a larger asset list
+asset_list = pd.concat([asset_list, missed_assets])
+
+asset_list = asset_list.merge(
+    inspections, how="left", on="Asset"
+)
+asset_list["WFT Findings"] = asset_list["WFT Findings"].fillna("No Inspections Note")
+
+# Store
+# asset_list.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
+#     "data/asset_list.xlsx"
+# )
+
+# We now prepare outcomes into a single file
+pv_outcomes = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford PV "
+    "Outcomes.csv",
+    encoding='cp1252'
+)
+pv_outcomes["measure_type"] = "solar"
+
+other_outcomes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/(Bromford) "
+    "15.04.2024.xlsx",
+    sheet_name="ECO4 & GBIS",
+    header=1
+)
+other_outcomes["measure_type"] = "cwi"
+
+combined_outcomes = pd.concat(
+    [
+        other_outcomes[["NO", "ADDRESS", "POSTCODE", "WEEK COMMENCING", "OUTCOMES", "NOTES"]].rename(
+            columns={
+                "NO": "No", "ADDRESS": "Address", "POSTCODE": "Postcode", "WEEK COMMENCING": "Week Commencing",
+                "OUTCOMES": "Outcome", "NOTES": "Notes"
+            }
+        ),
+        pv_outcomes[['No', 'Address', 'Postcode', "Week Commencing", "Outcome", "Notes"]]
+    ]
+)
+
+# Store
+# combined_outcomes.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
+#     "data/outcomes.xlsx"
+# )
+
+# Submissions sheet -
+eco3_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 Submissions.csv",
+    encoding='cp1252'
+)
+# Get rid of the unnamed columns
+unnamed_columns = [c for c in eco3_submissions.columns if "Unnamed: " in c]
+eco3_submissions = eco3_submissions.drop(columns=unnamed_columns)
+# Store
+eco3_submissions.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 submissions.csv",
+    index=False
+)
+
+eco4_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 4 submissions.csv",
+)
+
+same_cols = [c for c in eco4_submissions.columns if c in eco3_submissions.columns]
--- a/etl/customers/cambridge/remote_assessment.py
+++ b/etl/customers/cambridge/remote_assessment.py
@ -0,0 +1,138 @@
+import os
+import time
+
+from tqdm import tqdm
+import pandas as pd
+from dotenv import load_dotenv
+from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
+from backend.SearchEpc import SearchEpc
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+USER_ID = 8
+PORTFOLIO_ID = 122
+
+
+def app():
+    asset_list = [
+        {
+            "address": "12 Church Lane", "postcode": "CB23 8AF", "uprn": 100090136018,
+            "property_type": "House", "built-form": "Semi-Detached"
+        },
+        {
+            "address": "21 High Street", "postcode": "CB23 8AB", "uprn": 100090144815
+        },
+        {
+            "address": "22 High Street", "postcode": "CB23 8AB", "uprn": 100090144816
+        },
+        {
+            "address": "5 Bunkers Hill", "postcode": "CB3 0LY", "uprn": 10008078615
+        },
+        {
+            "address": "6 Bunkers Hill", "postcode": "CB3 0LY", "uprn": 10008078616
+        },
+        {
+            "address": "7 Bunkers Hill", "postcode": "CB3 0LY", "uprn": 10008078617
+        },
+        {
+            "address": "32 George Nuttall Close", "postcode": "CB4 1YE", "uprn": 200004200075
+        },
+        {
+            "address": "33 George Nuttall Close", "postcode": "CB4 1YE", "uprn": 200004200076
+        },
+        {
+            "address": "35 George Nuttall Close", "postcode": "CB4 1YE", "uprn": 200004200078
+        },
+        {
+            "address": "36 George Nuttall Close", "postcode": "CB4 1YE", "uprn": 200004200079
+        }
+    ]
+    asset_list = pd.DataFrame(asset_list)
+
+    valuations_data = [
+        {'uprn': 100090136018, "valuation": 586_000},
+        {'uprn': 100090144815, "valuation": 446_000},
+        {'uprn': 100090144816, "valuation": 448_000},
+        {'uprn': 10008078615, "valuation": 763_000},
+        {'uprn': 10008078616, "valuation": 616_000},
+        {'uprn': 10008078617, "valuation": 593_000},
+        {'uprn': 200004200075, "valuation": 450_000},
+        {'uprn': 200004200076, "valuation": 457_000},
+        {'uprn': 200004200078, "valuation": 304_000},
+        {'uprn': 200004200079, "valuation": 313_000}
+    ]
+
+    # Pull the additional data
+    extracted_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        add1 = home["address"]
+        pc = home["postcode"]
+        # Retrieve the EPC data
+        epc_searcher = SearchEpc(
+            address1=add1,
+            postcode=pc, uprn=home["uprn"], auth_token=EPC_AUTH_TOKEN, os_api_key=""
+        )
+        epc_searcher.find_property(skip_os=True)
+        if epc_searcher.newest_epc is None:
+            continue
+
+        find_epc_searcher = RetrieveFindMyEpc(address=epc_searcher.newest_epc["address1"],
+                                              postcode=epc_searcher.newest_epc["postcode"])
+        find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+        time.sleep(0.5)
+        # We need uprn
+
+        extracted_data.append(
+            {
+                "uprn": home["uprn"],
+                **find_epc_data,
+            }
+        )
+
+    non_invasive_recommendations = [
+        {
+            "uprn": r["uprn"],
+            "recommendations": r["recommendations"]
+        } for r in extracted_data
+    ]
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(asset_list),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store the non-invasive recommendations in s3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # Store the valuations data in s3
+    valuations_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(valuations_data),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=valuations_filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": valuations_filename,
+        "scenario_name": "Wave 3 Packages",
+        "multi_plan": True,
+        "budget": None,
+        "exclusions": []
+    }
+    print(body)
--- a/etl/customers/connells/pilot_remote_assessments.py
+++ b/etl/customers/connells/pilot_remote_assessments.py
@ -0,0 +1,108 @@
+import os
+import time
+
+from tqdm import tqdm
+import pandas as pd
+from dotenv import load_dotenv
+from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
+from backend.SearchEpc import SearchEpc
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+USER_ID = 8
+PORTFOLIO_ID = 123
+
+
+def app():
+    asset_list = [
+        {"address": "1 Raven Crescent", "postcode": "WV11 2EX", "uprn": 100071188496},
+
+        {"address": "13 Bayliss Avenue", "postcode": "WV11 2EX", "uprn": 100071136271},
+
+        {"address": "30 Southbourne Road", "postcode": "WV10 6ET", "uprn": 100071194376},
+
+        {"address": "96 Marsh Lane", "postcode": "WV10 6RX", "uprn": 100071176297},
+    ]
+    asset_list = pd.DataFrame(asset_list)
+
+    valuations_data = [
+        {'uprn': 100071188496, "valuation": 175_000},
+        {'uprn': 100071136271, "valuation": 183_000},
+        {'uprn': 100071194376, "valuation": 221_000},
+        {'uprn': 100071176297, "valuation": 208_000},
+    ]
+
+    # Pull the additional data
+    extracted_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        add1 = home["address"]
+        pc = home["postcode"]
+        # Retrieve the EPC data
+        epc_searcher = SearchEpc(
+            address1=add1,
+            postcode=pc, uprn=home["uprn"], auth_token=EPC_AUTH_TOKEN, os_api_key=""
+        )
+        epc_searcher.find_property(skip_os=True)
+        if epc_searcher.newest_epc is None:
+            continue
+
+        find_epc_searcher = RetrieveFindMyEpc(address=epc_searcher.newest_epc["address1"],
+                                              postcode=epc_searcher.newest_epc["postcode"])
+        find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+        time.sleep(0.5)
+        # We need uprn
+
+        extracted_data.append(
+            {
+                "uprn": home["uprn"],
+                **find_epc_data,
+            }
+        )
+
+    non_invasive_recommendations = [
+        {
+            "uprn": r["uprn"],
+            "recommendations": r["recommendations"]
+        } for r in extracted_data
+    ]
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(asset_list),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store the non-invasive recommendations in s3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # Store the valuations data in s3
+    valuations_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(valuations_data),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=valuations_filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": valuations_filename,
+        "scenario_name": "Wave 3 Packages",
+        "multi_plan": True,
+        "budget": None,
+        "exclusions": []
+    }
+    print(body)
--- a/etl/customers/cottons/parse_pdf_asset_list.py
+++ b/etl/customers/cottons/parse_pdf_asset_list.py
@ -0,0 +1,64 @@
+import re
+import pandas as pd
+from PyPDF2 import PdfReader
+
+# Paths to the uploaded files
+file_paths = [
+    "/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged).pdf",
+    "/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 2.pdf",
+    "/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 3.pdf",
+    "/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 4.pdf",
+    "/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 5.pdf",
+    "/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 6.pdf"
+]
+
+
+# Function to extract text from PDFs
+def extract_text_from_pdf_with_pypdf2(file_path):
+    text = ""
+    reader = PdfReader(file_path)
+    for page in reader.pages:
+        text += page.extract_text()
+    return text
+
+
+# Initialize a list to hold all parsed data
+all_parsed_data = []
+
+# Process each PDF individually
+for i, path in enumerate(file_paths):
+    # Extract text from the PDF
+    extracted_text = extract_text_from_pdf_with_pypdf2(path)
+
+    # Step 1: Remove titles and repeated headers
+    cleaned_text = re.sub(r"Managed Property Report as at \d+ \w+ \d+", "", extracted_text)
+    cleaned_text = re.sub(r"Code Property Address Management Type", "", cleaned_text)
+
+    # Step 2: Extract rows ending with "Managed"
+    rows = re.findall(r".*?Managed", cleaned_text)
+
+    # Step 3: Parse rows into structured data
+    parsed_data = []
+    for row in rows:
+        match = re.match(r"(\S+)\s+(.+?)\s+Managed", row.strip())
+        if match:
+            code = match.group(1).strip()
+            address = match.group(2).strip()
+            parsed_data.append((code, address, "Managed"))
+
+    # Append parsed data to the global list
+    all_parsed_data.extend(parsed_data)
+
+    # Provide feedback for debugging
+    print(f"File {i + 1} processed: {len(parsed_data)} rows")
+
+# Step 4: Create a unified DataFrame
+final_df = pd.DataFrame(all_parsed_data, columns=["Code", "Property Address", "Management Type"])
+
+# Step 5: Save the unified DataFrame to an Excel file
+final_output_file_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unified_Managed_Properties_List.xlsx"
+final_df.to_excel(final_output_file_path, index=False)
+
+# Provide feedback
+print(f"All files processed and combined. Total rows: {len(final_df)}")
+print(f"Unified file saved to: {final_output_file_path}")
--- a/etl/customers/cottons/prep_asset_list.py
+++ b/etl/customers/cottons/prep_asset_list.py
@ -0,0 +1,15 @@
+import pandas as pd
+
+df = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cottons/Cottons Asset List.xlsx"
+)
+
+# split up the address on commas. First section is address1, last seciton is postcode
+df["address1"] = df["Property Address"].apply(lambda x: x.split(",")[0].strip())
+df["postcode"] = df["Property Address"].apply(lambda x: x.split(",")[-1].strip())
+
+# Re-save
+df.to_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cottons/Cottons Asset List.xlsx",
+    index=False,
+)
--- a/etl/customers/cottons/remote_assessments.py
+++ b/etl/customers/cottons/remote_assessments.py
@ -0,0 +1,124 @@
+import os
+import time
+
+from tqdm import tqdm
+import pandas as pd
+from dotenv import load_dotenv
+from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
+from backend.SearchEpc import SearchEpc
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+USER_ID = 8
+PORTFOLIO_ID = 121
+
+
+def app():
+    """
+    Prepares the inputs to produce the remote assessments for Cottons
+    :return:
+    """
+
+    # Read in the asset list
+    cottons_asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cottons/Cottons Asset List EPC Data Pull with "
+        "valuations.xlsx"
+    )
+    # A number are missing EPCs due to the space in the postcode
+    # Breakdowns:
+    # C    119
+    # D     106
+    # E     26
+    # B      5
+    #
+    # Take the EPC D/E properties
+    asset_list = cottons_asset_list[
+        cottons_asset_list["EPC rating on register"].isin(["D", "E"])
+    ]
+    asset_list = asset_list.reset_index(drop=True)
+    asset_list["row_id"] = asset_list.index
+    asset_list["uprn"] = asset_list["uprn"].astype(int)
+
+    extracted_data = []
+    model_asset_list = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        add1 = home["address1"]
+        pc = home["postcode"]
+        # Retrieve the EPC data
+        epc_searcher = SearchEpc(
+            address1=add1,
+            postcode=pc, uprn=home["uprn"], auth_token=EPC_AUTH_TOKEN, os_api_key=""
+        )
+        epc_searcher.find_property(skip_os=True)
+
+        find_epc_searcher = RetrieveFindMyEpc(address=epc_searcher.newest_epc["address1"],
+                                              postcode=epc_searcher.newest_epc["postcode"])
+        find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+        time.sleep(0.5)
+        # We need uprn
+
+        extracted_data.append(
+            {
+                "uprn": home["uprn"],
+                **find_epc_data,
+            }
+        )
+
+        model_asset_list.append(
+            {
+                "uprn": home["uprn"],
+                "address": epc_searcher.newest_epc["address1"],
+                "postcode": epc_searcher.newest_epc["postcode"],
+            }
+        )
+
+    non_invasive_recommendations = [
+        {
+            "uprn": r["uprn"],
+            "recommendations": r["recommendations"]
+        } for r in extracted_data
+    ]
+
+    valuations_data = asset_list[["uprn", "Zoopla Valuation"]].copy().rename(columns={"Zoopla Valuation": "valuation"})
+    valuations_data = valuations_data[~pd.isnull(valuations_data["valuation"])]
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(model_asset_list),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store the non-invasive recommendations in s3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # Store the valuations data in s3
+    valuations_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuations.csv"
+    save_csv_to_s3(
+        dataframe=valuations_data,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=valuations_filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increasing EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": valuations_filename,
+        "scenario_name": "Wave 3 Packages",
+        "multi_plan": True,
+        "budget": None,
+        "exclusions": ['air_source_heat_pump', 'boiler_upgrade', 'floor_insulation']
+    }
+    print(body)
--- a/etl/customers/gla/hug_postcodes.py
+++ b/etl/customers/gla/hug_postcodes.py
@ -0,0 +1,77 @@
+import inspect
+import pandas as pd
+from pathlib import Path
+from tqdm import tqdm
+from etl.epc.settings import EARLIEST_EPC_DATE
+from etl.spatial.OpenUprnClient import OpenUprnClient
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path("/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates")
+epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+
+aggregation = []
+for directory in tqdm(epc_directories):
+    data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+    # Rename the columns to the same format as the api returns
+    data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
+    data = data[data["posttown"].str.contains("London", case=False, na=False)]
+    if data.empty:
+        continue
+    # Take just date before the date threshold
+    data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+    data = data[~pd.isnull(data["uprn"])]
+    data["uprn"] = data["uprn"].astype(int)
+    # Take just the newest EPC per uprn, based on lodgement-date
+    data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
+    # Take EPC D and below
+    data = data[data["current-energy-rating"].isin(["D", "E", "F", "G"])]
+    data["postal_region"] = data["postcode"].str.split(" ").str[0]
+
+    # Take homes that don't have a gas boiler
+    off_gas = data[~data["main-fuel"].str.contains("mains gas", case=False, na=False)]
+
+    if off_gas.empty:
+        continue
+
+    # Remote properties with conservation area issues
+    uprns = off_gas["uprn"].unique()
+    # Get data
+    ca_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
+    off_gas = off_gas.merge(
+        ca_data[["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]].rename(
+            columns={"UPRN": "uprn"}
+        ),
+        how="left",
+        on="uprn",
+    )
+    # Remove any restricted units
+    off_gas = off_gas[
+        (off_gas["conservation_status"] != True)
+        & (off_gas["is_listed_building"] != True)
+        & (off_gas["is_heritage_building"] != True)
+        ]
+
+    off_gas = off_gas[
+        off_gas["tenure"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
+    ]
+
+    region_summary = off_gas.groupby("postal_region").size().reset_index(name="count")
+
+    aggregation.append(region_summary)
+
+postal_region_aggregation = pd.concat(aggregation)
+# Re-aggregate
+postal_region_aggregation = postal_region_aggregation.groupby("postal_region")["count"].sum().reset_index()
+
+postal_region_aggregation = postal_region_aggregation.sort_values("count", ascending=False)
+postal_region_aggregation = postal_region_aggregation.rename(
+    columns={"postal_region": "Postcode Region", "count": "Number of Homes"}
+)
+postal_region_aggregation.to_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/GLA/Off Gas EPC D-G Postal Regions - without conservation "
+    "area.xlsx",
+    index=False
+)
--- a/etl/customers/ksquared/Wave3
+++ b/etl/customers/ksquared/Wave3
@ -0,0 +1,425 @@
+import os
+import time
+import re
+
+from etl.epc.settings import EARLIEST_EPC_DATE
+from dotenv import load_dotenv
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
+from etl.spatial.OpenUprnClient import OpenUprnClient
+from backend.SearchEpc import SearchEpc
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+USER_ID = 8
+PORTFOLIO_ID = 117
+CAHA_PORTFOLIO_ID = 118
+
+
+def hornsey():
+    """
+    This script prepares the asset lists for the additional housing associations, CAHA and Hornsey Housing Trust,
+    that are forming a consortium led by AIHA
+    :return:
+    """
+
+    hornsey_asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
+        "Trust.xlsx",
+        sheet_name="Ksquared-All units information",
+        header=3
+    )
+
+    # We don't need the first row
+    hornsey_asset_list = hornsey_asset_list.iloc[1:]
+    # Fill NA values with empty strings
+    hornsey_asset_list = hornsey_asset_list.fillna("")
+    hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype(
+        str
+    ).str.strip()
+    hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip()
+    hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip()
+    # Replace double spaces
+    for col in ["Address letter or number", "Street address", "Postcode"]:
+        hornsey_asset_list[col] = hornsey_asset_list[col].str.replace("  ", " ")
+
+    hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
+
+    hornsey_asset_list["Wall Type Cleaned"] = np.where(
+        "Cavity" in hornsey_asset_list["Wall type"],
+        "Cavity",
+        "Solid"
+    )
+
+    missed_uprns = {
+        "Flat 13A Stowell House": 100021213098,
+        "Flat 24 Stowell House": 100021213110,
+        "Flat 1 36 Haringey Park": None
+    }
+    extracted_data = []
+    asset_list = []
+    hornsey_asset_list["row_id"] = hornsey_asset_list.index
+    for _, home in tqdm(hornsey_asset_list.iterrows(), total=len(hornsey_asset_list)):
+
+        if home["Address letter or number"] == "Flat 1 36 Haringey Park":
+            continue
+
+        # Some properties do not have an epc
+        if not home["Energy starting band (EPC)"]:
+            asset_list.append(
+                {
+                    "uprn": missed_uprns[home["Address letter or number"]],
+                    "address": home["Address letter or number"],
+                    "postcode": home["Postcode"],
+                    "property_type": "Flat",  # They're all flats
+                }
+            )
+            continue
+
+        unit_number = home["Address letter or number"]
+        street = home["Street address"]
+        postcode = home["Postcode"]
+        address = ", ".join([x for x in [unit_number, street] if x])
+        find_epc_searcher = RetrieveFindMyEpc(address=address, postcode=postcode)
+        find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+        time.sleep(0.5)
+        # We need uprn
+        searcher = SearchEpc(
+            address1=address,
+            postcode=postcode,
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            full_address=address,
+        )
+        searcher.find_property(skip_os=True)
+        newest_epc = searcher.newest_epc
+        if newest_epc["current-energy-efficiency"] != home["Energy starting band (EPC)"].split("-")[1]:
+            raise Exception("Something went wrong with the EPC data")
+
+        extracted_data.append(
+            {
+                "uprn": newest_epc["uprn"],
+                **find_epc_data,
+                "hotwater-description": newest_epc["hotwater-description"],
+            }
+        )
+
+        asset_list.append(
+            {
+                "uprn": newest_epc["uprn"],
+                "row_id": home["row_id"],
+                "address": home["Address letter or number"],
+                "postcode": home["Postcode"],
+                "property_type": "Flat",  # They're all flats
+            }
+        )
+
+    # Get conservation area data
+    # uprns = [x["uprn"] for x in extracted_data]
+    # conservation_area_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
+    #
+    # addresses = pd.DataFrame(asset_list)
+    # addresses["uprn"] = addresses["uprn"].astype(int)
+    # conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN")
+    # conservation_area_df.to_csv(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/hornsey_conservation_area_data.csv"
+    # )
+
+    # We format the extracted data so that is has the same structure as non-intrusive recommendations
+    # We then get the UPRNs and create the asset list
+
+    non_invasive_recommendations = [
+        {
+            "uprn": r["uprn"],
+            "recommendations": r["recommendations"]
+        } for r in extracted_data
+    ]
+    for r in non_invasive_recommendations:
+        new_recommendations = []
+        extracted = [r for r in extracted_data if r["uprn"] == r["uprn"]][0]
+        for rec in r["recommendations"]:
+            if extracted["hotwater-description"] == "Gas boiler/circulator, no cylinder thermostat":
+                if rec["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]:
+                    continue
+            rec["survey"] = False
+            new_recommendations.append(rec)
+        r["recommendations"] = new_recommendations
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(asset_list),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store the non-invasive recommendations in s3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increasing EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": "",
+        "scenario_name": "Wave 3 Packages",
+        "multi_plan": True,
+        "budget": None,
+        "exclusions": ["boiler_upgrade"]
+    }
+    print(body)
+
+
+def caha():
+    caha_asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Copy of AIHA - WHSHF Wave 3 bid - Consortium "
+        "member properties - CAHA.xlsx",
+        sheet_name="Ksquared-All units information",
+        header=3
+    )
+
+    caha_asset_list = caha_asset_list.iloc[1:]
+    # Fill NA values with empty strings
+    caha_asset_list = caha_asset_list.fillna("")
+    caha_asset_list["Address letter or number"] = caha_asset_list["Address letter or number"].astype(
+        str
+    ).str.strip()
+
+    # We Add POstcode as it wasn't populated - split on space and take the last two entries and re-concatenate on space
+    caha_asset_list["Street address"] = caha_asset_list["Street address"].str.strip()
+    caha_asset_list["Postcode"] = caha_asset_list["Street address"].str.split(" ").str[-2:].str.join(" ")
+    # Take just the columns we need
+    caha_asset_list = caha_asset_list[["Address letter or number", "Street address", "Postcode"]]
+
+    for col in ["Address letter or number", "Street address", "Postcode"]:
+        caha_asset_list[col] = caha_asset_list[col].str.replace("  ", " ")
+
+    # Pull the data from find my epc
+    remap = {
+        "Flat A, 50 Talbot Road N6 4QP": "50a Talbot Road",
+        "Flat A, 51 First Avenue EN1 1BN": "51a, First Avenue",
+        "Flat B, 51 First Avenue EN1 1BN": "51b, First Avenue"
+    }
+
+    def remap_address(address):
+        # Match patterns like 'Flat A, 30 Grove Park Road'
+        match = re.match(r'Flat (\w), (\d+) (.+)', address)
+        if match:
+            flat_letter = match.group(1)  # e.g., 'A'
+            number = match.group(2)  # e.g., '30'
+            rest_of_address = match.group(3)  # e.g., 'Grove Park Road'
+
+            # Format the new address as '30A Grove Park Road'
+            return f"{number}{flat_letter} {rest_of_address}"
+
+        # If pattern doesn't match, return original address
+        return address
+
+    caha_asset_list["row_id"] = caha_asset_list.index
+
+    extracted_data = []
+    asset_list = []
+    for _, home in tqdm(caha_asset_list.iterrows(), total=len(caha_asset_list)):
+        if home["Street address"] == "35 Stanford road N11 3HY" and home["Address letter or number"] == "":
+            continue
+
+        if home["Street address"] == "29 Victoria Avenue N3 1BD" and home["Address letter or number"] == "":
+            continue
+
+        if home["Street address"] == "11 Victoria Avenue N3 1BD" and home["Address letter or number"] == "Flat A":
+            continue
+
+        if home["Street address"] == "11 Victoria Avenue N3 1BD" and home["Address letter or number"] == "Flat C":
+            continue
+
+        if home["Street address"] == "10 Forest Gardens N17 6XA" and home["Address letter or number"] == "Flat C":
+            continue
+
+        if home["Street address"] == "219 Cann Hall Road E11 3NJ" and home["Address letter or number"] == "Flat B":
+            continue
+
+        unit_number = home["Address letter or number"]
+        street = home["Street address"]
+        postcode = home["Postcode"]
+        address = ", ".join([x for x in [unit_number, street] if x])
+        address = remap.get(address, address)
+        address = address.replace(postcode, "").strip()
+        if "Victoria Avenue" not in address:
+            address = remap_address(address)
+
+        find_epc_searcher = RetrieveFindMyEpc(address=address, postcode=postcode)
+        find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data(sap_2012_date=EARLIEST_EPC_DATE)
+        time.sleep(0.5)
+        # We need uprn
+        searcher = SearchEpc(
+            address1=address,
+            postcode=postcode,
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            full_address=address,
+        )
+        searcher.find_property(skip_os=True)
+        newest_epc = searcher.newest_epc
+
+        uprn = newest_epc["uprn"]
+        if address in ["Flat D, 11 Victoria Avenue", "Flat B, 11 Victoria Avenue"]:
+            uprn = None
+
+        extracted_data.append(
+            {
+                "uprn": uprn,
+                **find_epc_data,
+            }
+        )
+
+        asset_list.append(
+            {
+                "row_id": home["row_id"],
+                "uprn": uprn,
+                "address": address,
+                "postcode": home["Postcode"],
+                "property_type": newest_epc["property-type"],
+                "wall_type": newest_epc["walls-description"],
+                "built_form": newest_epc["built-form"],
+                "flat_storey_count": newest_epc['flat-storey-count'],
+            }
+        )
+
+    # Missing row ids
+    missed = [r for r in caha_asset_list["row_id"].tolist() if r not in [x["row_id"] for x in asset_list]]
+
+    no_data = [x for x in asset_list if x["uprn"] in [None, ""]]
+    no_data = pd.DataFrame(no_data)
+
+    # Get conservation area data
+    uprns = [x["uprn"] for x in extracted_data if x["uprn"] not in ["", None]]
+    conservation_area_data = OpenUprnClient.get_spatial_data([36284], "retrofit-data-dev")
+
+    addresses = pd.DataFrame(asset_list)
+    addresses["uprn"] = addresses["uprn"].astype(str)
+    conservation_area_data["UPRN"] = conservation_area_data["UPRN"].astype(str)
+    conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN")
+    conservation_area_df.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_conservation_area_data.csv"
+    )
+
+    non_invasive_recommendations = [
+        {
+            "uprn": r["uprn"],
+            "recommendations": r["recommendations"]
+        } for r in extracted_data
+    ]
+    # for r in non_invasive_recommendations:
+    #     new_recommendations = []
+    #     extracted = [r for r in extracted_data if r["uprn"] == r["uprn"]][0]
+    #     for rec in r["recommendations"]:
+    #         if extracted["hotwater-description"] == "Gas boiler/circulator, no cylinder thermostat":
+    #             if rec["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]:
+    #                 continue
+    #         rec["survey"] = False
+    #         new_recommendations.append(rec)
+    #     r["recommendations"] = new_recommendations
+
+    # We model the two properties separately
+    asset_list = pd.DataFrame(asset_list)
+    # Drop Flat D, 11 Victoria Avenue
+    asset_list1 = asset_list[asset_list["address"] != "Flat D, 11 Victoria Avenue"]
+    asset_list2 = asset_list[asset_list["address"] == "Flat D, 11 Victoria Avenue"]
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list1.csv"
+    save_csv_to_s3(
+        dataframe=asset_list1,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    filename2 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list2.csv"
+    save_csv_to_s3(
+        dataframe=asset_list2,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename2
+    )
+
+    # Store the non-invasive recommendations in s3
+    non_invasive_recommendations_filename = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    body = {
+        "portfolio_id": str(CAHA_PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increasing EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": "",
+        "scenario_name": "Wave 3 Packages",
+        "multi_plan": True,
+        "budget": None,
+        "exclusions": ["boiler_upgrade"]
+    }
+    print(body)
+
+    body2 = {
+        "portfolio_id": str(CAHA_PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increasing EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename2,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": "",
+        "scenario_name": "Wave 3 Packages",
+        "multi_plan": True,
+        "budget": None,
+        "exclusions": ["boiler_upgrade"]
+    }
+    print(body2)
+
+    #
+    asset_list3 = [
+        {
+            "address": "10b Forest Gardens", "postcode": "N17 6XA", "uprn": 100021180197
+        }
+    ]
+    filename3 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list3.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(asset_list3),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename3
+    )
+    body3 = {
+        "portfolio_id": str(119),
+        "housing_type": "Social",
+        "goal": "Increasing EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename3,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "valuation_file_path": "",
+        "scenario_name": "Wave 3 Packages",
+        "multi_plan": True,
+        "budget": None,
+        "exclusions": ["boiler_upgrade"]
+    }
+    print(body3)
--- a/etl/customers/l_and_g/ic_asset_list.py
+++ b/etl/customers/l_and_g/ic_asset_list.py
@ -0,0 +1,166 @@
+"""
+This script prepares the asset list for modelling the properties from the L&Q dataset, for their January IC
+"""
+
+import pandas as pd
+import numpy as np
+
+from etl.route_march_data_pull.app import get_data
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 124
+USER_ID = 8
+
+
+def app():
+    asset_data = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon information for Domna/Basildon MDS v1.4 "
+        "(1).xlsx",
+        sheet_name="Basildon",
+        header=5
+    )
+
+    asset_data = asset_data.head(-3)
+
+    asset_data["address1"] = np.where(
+        pd.isnull(asset_data["Address 1"]),
+        asset_data["Address 2"],
+        asset_data["Address 1"]
+    )
+
+    asset_data["full_address"] = np.where(
+        pd.isnull(asset_data["Address 1"]),
+        asset_data["Address 2"] + ", " + asset_data["Address 3"],
+        asset_data["Address 1"] + ", " + asset_data["Address 2"] + ", " + asset_data["Address 3"],
+    )
+
+    asset_list = asset_data[["address1", "PostCode", "full_address", "Bedrooms"]]
+
+    asset_list = asset_list.reset_index(drop=True)
+
+    asset_list["row_id"] = asset_list.index
+
+    # L&G's focus:
+    # Measures: loft and cavity insulation, replacement thermally efficient windows, PV cells, AS heat pumps.
+
+    epc_data, errors, no_epc = get_data(
+        asset_list=asset_list,
+        fulladdress_column="full_address",
+        address1_column="address1",
+        postcode_column="PostCode",
+        manual_uprn_map={}
+    )
+
+    missed = asset_list[
+        asset_list["row_id"].isin(no_epc)
+    ]
+
+    # We merge on the property types, where we have them
+    missed = missed.merge(
+        asset_data[["address1", "PostCode", "Property Type"]],
+        how="left",
+        on=["address1", "PostCode"]
+    )
+    # Remap Block: Residential to Flat
+    missed["Property Type"] = np.where(
+        missed["Property Type"] == "Block: Residential",
+        "Flat",
+        missed["Property Type"]
+    )
+
+    # We create the asset list - we have some properties that genuninely never had an EPC
+
+    epc_df = pd.DataFrame(epc_data)
+    fetched_asset_list = epc_df[["address1", "postcode", "uprn", "row_id"]]
+    fetched_asset_list = fetched_asset_list.merge(
+        asset_list[["row_id", "Bedrooms"]],
+        how="left",
+        on=["row_id"]
+    )
+
+    missed = missed.rename(columns={"PostCode": "postcode"}).drop(columns=["row_id"])
+
+    # missed.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/missed_epcs.csv")
+    missed_uprns = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/missed_epcs_uprn.csv",
+    )
+
+    missed = missed.merge(
+        missed_uprns[["address1", "postcode", "UPRN"]].rename(
+            columns={"UPRN": "uprn"},
+        ),
+        how="left",
+        on=["address1", "postcode"]
+    )
+
+    fetched_asset_list = fetched_asset_list.drop(columns=["row_id"])
+    # We concatename them
+    final_asset_list = pd.concat(
+        [fetched_asset_list, missed[["address1", "postcode", "Property Type", "Bedrooms", "uprn"]]]
+    )
+
+    final_asset_list = final_asset_list.rename(
+        columns={
+            "address1": "address",
+            "Property Type": "property_type",
+            "Bedrooms": "n_bedrooms"
+        }
+    )
+
+    # Finally, we merge on the numeber of bedrooms
+
+    # Extract the non-invasive recommendations:
+    non_invasive_recommendations = []
+    for x in epc_data:
+        non_invasive_recommendations.append(
+            {
+                "uprn": x["uprn"],
+                "recommendations": x["find_my_epc_data"]["recommendations"]
+            }
+        )
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(final_asset_list),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store the non-invasive recommendations in s3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # Store the valuations data in s3
+    # valuations_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuations.csv"
+    # save_csv_to_s3(
+    #     dataframe=pd.DataFrame(valuations_data),
+    #     bucket_name="retrofit-plan-inputs-dev",
+    #     file_name=valuations_filename
+    # )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": "",
+        "scenario_name": "Retrofit Packages",
+        "multi_plan": True,
+        "budget": None,
+        "inclusions": [
+            "cavity_wall_insulation",
+            "loft_insulation",
+            "windows",
+            "solar_pv",
+            "air_source_heat_pump"
+        ]
+    }
+    print(body)
--- a/etl/customers/l_and_g/ic_slides.py
+++ b/etl/customers/l_and_g/ic_slides.py
@ -0,0 +1,246 @@
+import pandas as pd
+from backend.app.utils import sap_to_epc
+
+data = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/property_202501170837.csv"
+)
+
+data["year_built"].value_counts()
+
+# 1950-1966     26
+# 1967-1975     37
+# 1976-1982     37
+# 1983-1990     33
+# 1991-1995    139
+# 1996-2002     42
+# 2003-2006     50
+
+data["full_property_type"] = data["property_type"] + ": " + data["built_form"]
+
+houses = data[data["property_type"].isin(["House", "Bungalow"])]
+houses["built_form"].value_counts()
+
+data["property_type"].value_counts()
+data["full_property_type"].value_counts()
+# House: Mid-Terrace           136
+# House: End-Terrace            83
+# House: Semi-Detached          55
+# Flat: Semi-Detached           24
+# Flat: End-Terrace             19
+# House: Detached               10
+# Flat: Mid-Terrace              9
+# Maisonette: Mid-Terrace        9
+# Maisonette: Semi-Detached      8
+# Maisonette: End-Terrace        6
+# Flat: Detached                 4
+# Bungalow: Detached             1
+
+epc_data = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/basildon EPC Data.csv"
+)
+
+# Classify floor area in <73m2, 73-98, 99-200, 200+
+epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
+    lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+")
+
+# 73-98     185
+# <73       156
+# 99-200     23
+
+epc_data["wall_type"] = epc_data["walls"].str.split(",").str[0]
+epc_data["wall_type"].value_counts()
+
+# Cavity wall     343
+# Timber frame     15
+# System built      6
+
+# we pull some additional data
+# We want:
+# 1) The list of properties included in the portfolio, with uprn
+# 2) The recommendations against each property with costs, and whether or not the recommendation was defaulted
+# 3) The properties without recommendations and why
+
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+
+
+def get_data(portfolio_id, scenario_ids):
+    session = sessionmaker(bind=db_engine)()
+    session.begin()
+
+    # Get properties and their details for a specific portfolio
+    properties_query = session.query(
+        PropertyModel,
+        PropertyDetailsEpcModel
+    ).join(
+        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
+    ).filter(
+        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
+    ).all()
+
+    # Transform properties data to include all fields dynamically
+    properties_data = [
+        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
+         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
+            PropertyDetailsEpcModel.__table__.columns}}
+        for prop in properties_query
+    ]
+
+    # Get property IDs from fetched properties
+
+    # Get plans linked to the fetched properties
+    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+
+    # Transform plans data to include all fields dynamically
+    plans_data = [
+        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        for plan in plans_query
+    ]
+
+    # Extract plan IDs for filtering recommendations through PlanRecommendations
+    plan_ids = [plan['id'] for plan in plans_data]
+
+    # Get recommendations through PlanRecommendations for those plans and that are default
+    recommendations_query = session.query(
+        Recommendation,
+        Plan.scenario_id
+    ).join(
+        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
+    ).join(
+        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
+    ).filter(
+        PlanRecommendations.plan_id.in_(plan_ids),
+        Recommendation.default == True  # Filtering for default recommendations
+    ).all()
+
+    # Transform recommendations data to include all fields dynamically and include scenario_id
+    recommendations_data = [
+        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
+                                                                                                           col.name) for
+            col in Recommendation.__table__.columns},
+         "Scenario ID": rec.scenario_id}
+        for rec in recommendations_query
+    ]
+
+    session.close()
+
+    return properties_data, plans_data, recommendations_data
+
+
+properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
+
+properties_df = pd.DataFrame(properties_data)
+plans_df = pd.DataFrame(plans_data)
+recommendations_df = pd.DataFrame(recommendations_data)
+
+recommended_measures_df = recommendations_df[
+    ["property_id", "measure_type", "estimated_cost", "default"]
+]
+recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
+recommended_measures_df = recommended_measures_df.drop(columns=["default"])
+
+post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
+post_install_sap = post_install_sap[post_install_sap["default"]]
+# Sum up the sap points by property id
+post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
+
+recommendations_measures_pivot = recommended_measures_df.pivot(
+    index='property_id',
+    columns='measure_type',
+    values='estimated_cost'
+)
+recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
+
+recommendations_measures_pivot = recommendations_measures_pivot.rename(
+    columns={
+        "air_source_heat_pump": "Cost: Air Source Heat Pump",
+        "cavity_wall_insulation": "Cost: Cavity Wall Insulation",
+        "double_glazing": "Cost: Double Glazing",
+        "loft_insulation": "Cost: Loft Insulation",
+        "mechanical_ventilation": "Cost: Ventilation",
+        "solar_pv": "Cost: Solar PV"
+    }
+)
+recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
+recommendations_measures_pivot["Recommendation: Air Source Heat Pump"] = (
+    recommendations_measures_pivot["Cost: Air Source Heat Pump"] > 0
+)
+recommendations_measures_pivot["Recommendation: Cavity Wall Insulation"] = (
+    recommendations_measures_pivot["Cost: Cavity Wall Insulation"] > 0
+)
+recommendations_measures_pivot["Recommendation: Double Glazing"] = (
+    recommendations_measures_pivot["Cost: Double Glazing"] > 0
+)
+recommendations_measures_pivot["Recommendation: Loft Insulation"] = (
+    recommendations_measures_pivot["Cost: Loft Insulation"] > 0
+)
+recommendations_measures_pivot["Recommendation: Ventilation"] = (
+    recommendations_measures_pivot["Cost: Ventilation"] > 0
+)
+recommendations_measures_pivot["Recommendation: Solar PV"] = (
+    recommendations_measures_pivot["Cost: Solar PV"] > 0
+)
+
+df = properties_df[
+    [
+        "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
+        "current_epc_rating",
+        "current_sap_points", "total_floor_area", "number_of_rooms",
+    ]
+].merge(
+    recommendations_measures_pivot, how="left", on="property_id"
+).merge(
+    post_install_sap, how="left", on="property_id"
+)
+
+df = df.drop(columns=["property_id"])
+df["sap_points"] = df["sap_points"].fillna(0)
+
+df = df.rename(
+    columns={
+        "uprn": "UPRN",
+        "address": "Address",
+        "postcode": "Postcode",
+        "walls": "Walls",
+        "roof": "Roof",
+        "heating": "Heating",
+        "windows": "Windows",
+        "current_epc_rating": "Current EPC Rating",
+        "current_sap_points": "Current SAP Points",
+        "total_floor_area": "Total Floor Area",
+        "number_of_rooms": "Number of Habitable Rooms",
+        "floor_height": "Floor Height",
+    }
+)
+
+df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])
+
+# We fill missings:
+for col in [
+    "Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation",
+    "Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation",
+    "Recommendation: Solar PV"
+]:
+    df[col] = df[col].fillna(False)
+
+for col in [
+    "Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation",
+    "Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation",
+    "Cost: Solar PV"
+]:
+    df[col] = df[col].fillna(0)
+
+# Calculate post SAP
+df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
+df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
+df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
+
+df["Recommendation: Air Source Heat Pump"].sum()
+df["Cost: Air Source Heat Pump"].sum()
+
+df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)
--- a/etl/customers/lambeth/re-knocks.py
+++ b/etl/customers/lambeth/re-knocks.py
@ -0,0 +1,23 @@
+import pandas as pd
+
+data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/Lambeth Reknocks.xlsx", sheet_name="Possible Route",
+    header=1
+)
+
+data["Outcomes"].value_counts()
+
+# Strip out: No
+
+df = data[data["Outcomes"] == "See notes"]
+notes_df = df[
+    ("Notes (If 'no answer' under outcomes, have you checked around the property for access issues where "
+     "possible?)")].value_counts().to_frame()
+
+example = df[df["Notes (If 'no answer' under outcomes, have you checked around the property for access issues where "
+                "possible?)"] == ('Access to rear of property only through number 10. Overgrown athe rear of property '
+                                  'installer wont be able to access')
+             ]
+
+# 18 did not attend
+#
--- a/etl/customers/livewest/route_march_2024_10_28.py
+++ b/etl/customers/livewest/route_march_2024_10_28.py
@ -0,0 +1,225 @@
+import os
+import time
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(asset_list):
+    epc_data = []
+    errors = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        try:
+            postcode = home["Postcode"]
+            house_number = home["Number"]
+            full_address = home["Full Address"]
+
+            searcher = SearchEpc(
+                address1=str(house_number),
+                postcode=postcode,
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address,
+                max_retries=5
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+            if searcher.newest_epc is None:
+                continue
+
+            # Look for EPC recommendatons
+            try:
+                property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+            except:
+                property_recommendations = {"rows": []}
+
+            epc = {
+                "row_id": home["row_id"],
+                **searcher.newest_epc.copy(),
+                "recommendations": property_recommendations["rows"]
+            }
+
+            epc_data.append(epc)
+        except Exception as e:
+            errors.append(home["row_id"])
+            time.sleep(5)
+
+    return epc_data, errors
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by Livewest
+
+    Data request contents:
+    Date of last EPC
+    Reason for EPC
+    SAP score on register
+    Property Type
+    Property Area
+    Property Age
+    Any Dimensions (HLP,PW,RH)
+    Property Wall Construction
+    Heating Type
+    Secondary Heating
+    Loft Insulation Depth
+
+    Additional if possible:
+    Heat loss calculations
+    EPC recommendations
+    Property UPRN
+
+    """
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/LIVEWEST 3578 ECO4 ECO PLUS GBIS.xlsx", header=0
+    )
+    asset_list["row_id"] = asset_list.index
+
+    epc_data, errors = get_data(asset_list)
+
+    # We now retrieve any failed properties
+    asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
+    epc_data_failed, _ = get_data(asset_list_failed)
+
+    # Append the failed data to the main data
+    epc_data.extend(epc_data_failed)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # We expand out the recommendations
+    recommendations_df = epc_df[["row_id", "recommendations"]]
+
+    unique_recommendations = set()
+    for _, row in recommendations_df.iterrows():
+        unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+
+    columns = ["row_id"] + list(unique_recommendations)
+    transformed_data = []
+    for _, row in recommendations_df.iterrows():
+        # Initialize a dictionary for this row with False for all recommendations
+        row_data = {col: False for col in columns}
+        row_data["row_id"] = row["row_id"]
+
+        # Set True for each recommendation present in this row
+        for rec in row["recommendations"]:
+            recommendation_text = rec["improvement-summary-text"]
+            row_data[recommendation_text] = True
+
+        # Append the row data to transformed_data
+        transformed_data.append(row_data)
+
+    transformed_df = pd.DataFrame(transformed_data)
+    # Drop the column that is ""
+    transformed_df = transformed_df.drop(columns=[""])
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "row_id",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description",
+            #
+            "energy-consumption-current",  # kwh/m2
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        on="row_id"
+    ).merge(
+        transformed_df,
+        how="left",
+        on="row_id"
+    )
+
+    asset_list = asset_list.drop(columns=["row_id"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "total-floor-area": "Property Floor Area",
+        "construction-age-band": "Property Age Band",
+        "floor-height": "Property Floor Height",
+        "number-habitable-rooms": "Number of Habitable Rooms",
+        "walls-description": "Wall Construction",
+        "roof-description": "Roof Construction",
+        "mainheat-description": "Heating Type",
+        "secondheat-description": "Secondary Heating",
+        "transaction-type": "Reason for last EPC",
+        "energy-consumption-current": "Heat Demand (kWh/m2)"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
+            x["Property Type"]) else None, axis=1
+    )
+
+    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+    # Replace "" value with None
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ), axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["Archetype"]
+        ),
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
+            x["Roof Construction"]) else None,
+        axis=1
+    )
+
+    # Store as an excel
+    filename = "livewest EPC Data pull - 29 Oct.xlsx"
+    asset_list.to_excel(filename, index=False)
--- a/etl/customers/mod/pilot/1.
+++ b/etl/customers/mod/pilot/1.
@ -0,0 +1,205 @@
+import os
+import pandas as pd
+from tqdm import tqdm
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from etl.spatial.OpenUprnClient import OpenUprnClient
+from asset_list.utils import get_data
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 139
+USER_ID = 8
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    Given the sample data and additonal properties, this function prepares the data
+    :return:
+    """
+    folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme"
+    sample_list = pd.read_excel(f"{folder_path}/20250227_DIO_Accommodation_Sample_Properties.xlsx")
+    asset_data = pd.read_excel(f"{folder_path}/20250303_DIO_Accommodation_Property_Attribution.xlsx")
+
+    sample_list = sample_list[sample_list["BLDNG_COUNTRY_NAME"].isin(["ENGLAND", "WALES"])]
+
+    # Merge on the UPRN
+    sample_list = sample_list.merge(
+        asset_data[["BLDNG_ID", "BLNDG_GOVERMENT_UPRN"]].drop_duplicates(),
+        how="left", on="BLDNG_ID"
+    )
+    sample_list["BLNDG_GOVERMENT_UPRN"] = sample_list["BLNDG_GOVERMENT_UPRN"].astype("Int64")
+
+    # Use the EPC API to get corrected postcodes
+    model_asset_list = []
+    missed = []
+    for _, x in tqdm(sample_list.iterrows(), total=len(sample_list)):
+
+        if pd.isnull(x["BLNDG_GOVERMENT_UPRN"]):
+            continue
+        searcher = SearchEpc(
+            address1="",
+            postcode="",
+            uprn=x["BLNDG_GOVERMENT_UPRN"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=""
+        )
+        searcher.find_property(skip_os=True)
+        newest_epc = searcher.newest_epc
+        if newest_epc is None:
+            missed.append(x["BLNDG_GOVERMENT_UPRN"])
+            continue
+
+        model_asset_list.append(newest_epc)
+
+    model_asset_list = pd.DataFrame(model_asset_list)
+    model_asset_list["uprn"] = model_asset_list["uprn"].astype(int)
+
+    spatial_data = OpenUprnClient.get_spatial_data(
+        uprns=model_asset_list["uprn"].tolist(), bucket_name="retrofit-data-dev"
+    )
+
+    # We determine if the building is listed, heritage or in a conservation area
+
+    # Merge on the property features
+    features = asset_data.drop(
+        columns=["BUILDING_SYSTEM_ITEM_NAME", "OBSERVED_CONDITION_DESCRIPTION"]
+    ).drop_duplicates()
+
+    df = features.merge(
+        model_asset_list, how="inner", right_on="uprn", left_on="BLNDG_GOVERMENT_UPRN"
+    ).merge(
+        pd.DataFrame(spatial_data).rename(columns={"UPRN": "uprn"}), how="left", on="uprn"
+    )
+
+    # Store data locally
+    # df.to_csv(folder_path + "/MOD property data.csv", index=False)
+
+    # Produce as asset list for analysis
+
+    df["row_id"] = df.index
+
+    epc_data, errors, no_epc = get_data(
+        df=df,
+        manual_uprn_map={},
+        epc_auth_token=EPC_AUTH_TOKEN,
+        uprn_column="uprn",
+        fulladdress_column="address",
+        address1_column="address1",
+        postcode_column="postcode",
+        property_type_column=None,
+        built_form_column=None,
+        epc_api_only=False,
+        row_id_name="row_id",
+    )
+
+    non_invasive_recommendations = []
+    for x in epc_data:
+        non_invasive_recommendations.append(
+            {
+                "uprn": x["uprn"],
+                "recommendations": x["find_my_epc_data"]["recommendations"]
+            }
+        )
+
+    # also include the floor area
+    asset_list = df[
+        ["uprn", "address1", "postcode", "NUMBER_OF_BEDROOMS", "BLDNG_STOREYS_QTY", "BLDNG_MSRMNT_VAL"]
+    ].rename(
+        columns={
+            "address1": "address",
+            "NUMBER_OF_BEDROOMS": "n_bedrooms",
+            "BLDNG_STOREYS_QTY": "number_of_floors",
+            "BLDNG_MSRMNT_VAL": "floor_area"
+        }
+    )
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store the non-invasive recommendations in s3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # Scenario 1 - EPC C
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": "",
+        "scenario_name": "Hit EPC C",
+        "multi_plan": True,
+        "budget": None,
+        # "inclusions": [
+        #     "cavity_wall_insulation",
+        #     "loft_insulation",
+        #     "windows",
+        #     "solar_pv",
+        #     "air_source_heat_pump"
+        # ]
+    }
+    print(body)
+
+    # Scenario 2 - EPC B
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": "",
+        "scenario_name": "Hit EPC B",
+        "multi_plan": True,
+        "budget": None,
+        # "inclusions": [
+        #     "cavity_wall_insulation",
+        #     "loft_insulation",
+        #     "windows",
+        #     "solar_pv",
+        #     "air_source_heat_pump"
+        # ]
+    }
+    print(body)
+
+    # Scenario 3 - EPC B, 3.5 COP ASHP
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": "",
+        "scenario_name": "Hit EPC B - 3.5 COP ASHP",
+        "multi_plan": True,
+        "budget": None,
+        "ashp_cop": 3.5
+        # "inclusions": [
+        #     "cavity_wall_insulation",
+        #     "loft_insulation",
+        #     "windows",
+        #     "solar_pv",
+        #     "air_source_heat_pump"
+        # ]
+    }
+    print(body)
--- a/etl/customers/mod/pilot/2.
+++ b/etl/customers/mod/pilot/2.
@ -0,0 +1,652 @@
+from pprint import pprint
+import pandas as pd
+import numpy as np
+from backend.app.utils import sap_to_epc
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+
+
+def get_data(portfolio_id, scenario_ids):
+    session = sessionmaker(bind=db_engine)()
+    session.begin()
+
+    # Get properties and their details for a specific portfolio
+    properties_query = session.query(
+        PropertyModel,
+        PropertyDetailsEpcModel
+    ).join(
+        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
+    ).filter(
+        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
+    ).all()
+
+    # Transform properties data to include all fields dynamically
+    properties_data = [
+        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
+         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
+            PropertyDetailsEpcModel.__table__.columns}}
+        for prop in properties_query
+    ]
+
+    # Get property IDs from fetched properties
+
+    # Get plans linked to the fetched properties
+    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+
+    # Transform plans data to include all fields dynamically
+    plans_data = [
+        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        for plan in plans_query
+    ]
+
+    # Extract plan IDs for filtering recommendations through PlanRecommendations
+    plan_ids = [plan['id'] for plan in plans_data]
+
+    # Get recommendations through PlanRecommendations for those plans and that are default
+    recommendations_query = session.query(
+        Recommendation,
+        Plan.scenario_id
+    ).join(
+        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
+    ).join(
+        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
+    ).filter(
+        PlanRecommendations.plan_id.in_(plan_ids),
+        Recommendation.default == True  # Filtering for default recommendations
+    ).all()
+
+    # Transform recommendations data to include all fields dynamically and include scenario_id
+    recommendations_data = [
+        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation')
+        else getattr(rec, col.name) for
+            col in Recommendation.__table__.columns},
+         "Scenario ID": rec.scenario_id}
+        for rec in recommendations_query
+    ]
+
+    session.close()
+
+    return properties_data, plans_data, recommendations_data
+
+
+def app():
+    """
+    Given a portfolio and a scenario, this function prepares an excel model to present the data
+    """
+
+    # Set the inputs:
+    portfolio_id = 139
+    scenario_ids = [237, 238]
+
+    properties_data, plans_data, recommendations_data = get_data(
+        portfolio_id=portfolio_id, scenario_ids=scenario_ids
+    )
+
+    properties_df = pd.DataFrame(properties_data)
+    plans_df = pd.DataFrame(plans_data)
+    recommendations_df = pd.DataFrame(recommendations_data)
+
+    # Merge on the orignal data
+    mod_property_data = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD property data.csv"
+    )
+
+    property_asset_data = properties_df.merge(
+        mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn"
+    )
+
+    property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False)
+    property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970
+    property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip()
+    property_asset_data["is_insulated"] = (
+        property_asset_data["walls"].str.split(",").str[1].str.strip().isin(
+            ["filled cavity", "with external insulation", "filled cavity and external insulation"]
+        ) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"])
+    )
+    property_asset_data["is_insulated"] = np.where(
+        property_asset_data["is_insulated"], "Insulated", "Uninsulated"
+    )
+    property_asset_data["is_pitched"] = np.where(
+        property_asset_data["is_pitched"], "Pitched roof", "Not Pitched Roof"
+    )
+    property_asset_data["pre_1970"] = np.where(
+        property_asset_data["pre_1970"], "Pre 1970", "Post 1970"
+    )
+
+    archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"]
+
+    assigned_archetypes = (
+        property_asset_data.groupby(
+            archetype_variables
+        ).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False)
+    )
+
+    # Make the archetype ID a concatenation of the variables
+    assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply(
+        lambda x: "_".join(x.astype(str)), axis=1
+    )
+
+    # Most prominent archetypes
+    prominent_archetypes = assigned_archetypes.head(6)
+    other_archetypes = assigned_archetypes.tail(-6)
+    # 2 or fewer properties in the other archetypes
+
+    property_asset_data = property_asset_data.merge(
+        assigned_archetypes[archetype_variables + ["archetype_id"]],
+        how="left",
+        on=archetype_variables
+    )
+
+    # Create age bands:
+    # 1960-1969
+    # 1970-1979
+    # 1980-1989
+    # 1990-1999
+    # 2000+
+    property_asset_data["age_band"] = pd.cut(
+        property_asset_data["BUILD_YEAR"],
+        bins=[1959, 1969, 1979, 1989, 1999, 2022],
+        labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"]
+    )
+
+    # Create floor area bands
+    # 0-73
+    # 74-97
+    # 98-199
+    # 200+
+    property_asset_data["floor_area_band"] = pd.cut(
+        property_asset_data["total_floor_area"],
+        bins=[0, 73, 97, 199, 10000],
+        labels=["0-73", "74-97", "98-199", "200+"]
+    )
+
+    property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
+    property_asset_data["archetype_group"] = np.where(
+        property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values),
+        "other",
+        property_asset_data["archetype_group"]
+    )
+
+    # For colour
+    wall_types = (
+        property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename(
+            columns={"wall_type": "Wall Type"}
+        )
+    )
+    # Group into age bands
+    ages = (
+        property_asset_data[["age_band"]].value_counts()
+        .to_frame()
+        .reset_index().sort_values("age_band", ascending=True)
+        .rename(columns={"age_band": "Age Band"})
+    )
+    floor_area_bands = (
+        property_asset_data[["floor_area_band"]].value_counts()
+        .to_frame()
+        .reset_index().sort_values("floor_area_band", ascending=True)
+        .rename(columns={"floor_area_band": "Floor Area Band"})
+    )
+    archetype_counts = (
+        property_asset_data[["archetype_group"]].
+        value_counts().
+        to_frame().
+        reset_index()
+        .rename(columns={"archetype_group": "Archetype"})
+    )
+    property_types = (
+        (property_asset_data["property_type"] + ": " + property_asset_data["built_form"]).
+        value_counts().
+        to_frame().
+        reset_index()
+        .rename(columns={"index": "Property Type", 0: "Count"})
+    )
+
+    # epc breakdown
+    epc_breakdown = (
+        property_asset_data["current_epc_rating"]
+        .apply(lambda x: x.value)
+        .value_counts()
+        .to_frame()
+        .reset_index()
+    )
+
+    # Figures for the deck
+    # Carbon per property
+    totals = property_asset_data[
+        [
+            "Total_household_members",
+            "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
+            "heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
+            "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
+        ]
+    ].copy()
+    totals["total_cost"] = (
+        totals["heating_cost_current"] +
+        totals["hot_water_cost_current"] +
+        totals["lighting_cost_current"] +
+        totals["appliances_cost_current"] +
+        totals["gas_standing_charge"] +
+        totals["electricity_standing_charge"]
+    )
+    print(
+        totals[
+            [
+                "Total_household_members",
+                "co2_emissions",
+                "current_energy_demand",
+                "total_cost",
+            ]
+        ].mean()
+    )
+
+    # Store these to an excel
+    # with pd.ExcelWriter(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD archetype breakdowns.xlsx"
+    # ) as writer:
+    #     wall_types.to_excel(writer, sheet_name="Wall Types", index=False)
+    #     ages.to_excel(writer, sheet_name="Ages", index=False)
+    #     floor_area_bands.to_excel(writer, sheet_name="Floor Area Bands", index=False)
+    #     archetype_counts.to_excel(writer, sheet_name="Archetype Counts", index=False)
+    #     epc_breakdown.to_excel(writer, sheet_name="EPC Rating", index=False)
+
+    contingency = 0.26
+
+    # We prepare the outputs, by scenario
+    scenario_data = {}
+    for scenario in scenario_ids:
+
+        scenario_recommendations_df = recommendations_df[
+            recommendations_df["Scenario ID"] == scenario
+            ].copy()
+
+        scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"]
+        scenario_recommendations_df["total_cost"] = (
+            scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"]
+        )
+
+        recommended_measures_df = scenario_recommendations_df[
+            ["property_id", "measure_type", "estimated_cost", "default"]
+        ]
+
+        recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
+        recommended_measures_df = recommended_measures_df.drop(columns=["default"])
+
+        # Metrics by property ID
+        aggregated_metrics = scenario_recommendations_df[
+            [
+                "property_id", "type", "default", "sap_points",
+                "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency",
+                "total_cost"
+            ]
+        ]
+        aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
+        aggregated_metrics = aggregated_metrics.groupby("property_id")[
+            ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
+             "total_cost", "contingency"]
+        ].sum().reset_index()
+
+        recommendations_measures_pivot = recommended_measures_df.pivot(
+            index='property_id',
+            columns='measure_type',
+            values='estimated_cost'
+        )
+        recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
+        recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
+
+        # We flag with boolean if the measure is recommended
+        for c in recommendations_measures_pivot.columns:
+            if c == "property_id":
+                continue
+            recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0
+
+        # We now create a final output
+        df = properties_df[
+            [
+                "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
+                "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
+                "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
+                "heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
+                "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
+            ]
+        ].merge(
+            recommendations_measures_pivot, how="left", on="property_id"
+        ).merge(
+            aggregated_metrics, how="left", on="property_id"
+        )
+
+        df["bills_total_cost"] = (
+            df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] +
+            df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"]
+        )
+
+        df = df.drop(columns=["property_id"])
+        for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
+            df[c] = df[c].fillna(0)
+
+        df = df.rename(
+            columns={
+                "uprn": "UPRN",
+                "address": "Address",
+                "postcode": "Postcode",
+                "walls": "Walls",
+                "roof": "Roof",
+                "heating": "Heating",
+                "windows": "Windows",
+                "current_epc_rating": "Current EPC Rating",
+                "current_sap_points": "Current SAP Points",
+                "total_floor_area": "Total Floor Area",
+                "number_of_rooms": "Number of Habitable Rooms",
+                "floor_height": "Floor Height",
+            }
+        )
+
+        # Calculate post SAP
+        df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
+        df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
+        df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
+
+        # Calculate the relative savings on carbon, kwh, and bills
+        df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"]
+        df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
+        df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]
+
+        # Add on the archetype
+        df = df.merge(
+            property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn"
+        )
+
+        # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
+        # the bills go up recommending HHRSH, so it doesn't make it to EPC B
+        # For mid-terrace units, use the ordnance survey API to check if there is space for a heat pump?
+        # DO it manually???
+
+        # Doesn't make it
+        # misses = df[df["Predicted Post Works EPC"] == "C"]
+        # # 5 of them are flats and so are difficult to get to EPC B without renewables. Possibly not worth it from an
+        # # ROI perspective
+        #
+        # misses[["UPRN", "Address", "Postcode", "property_type"]]
+
+        #              UPRN                               Address  Postcode property_type
+        # 2    100120988937              13 Sidbury Circular Road   SP9 7HX          Flat  No further action
+        # 3    100120988998              74 Sidbury Circular Road   SP9 7JA          Flat  No further action
+        # 4    100120989416                       47 Zouch Avenue   SP9 7LR          Flat  No further action
+        # 6    100060585002  42, Muscott Close, Shipton Bellinger   SP9 7TX         House  Can probably take a heat pump
+        # 37    10000801072        34 Luffenham Place, Chicksands  SG17 5XH         House  Already surveyed as having
+        # an ASHP - should be looked at
+        # 121  100120988259                      8, Karachi Close   SP9 7LW          Flat
+        # 122  100121101217                     599, Pepper Place  BA12 0DW          Flat
+        # 140  100021455241         33 Blenheim Crescent, Ruislip   HA4 7HA         House - Solar isnt recommended
+        # due to bug
+        # 149  100120915656            10 Bower Green, Shrivenham   SN6 8TU         House - Solar isn't recommended
+        # due to bug
+
+        scenario_data[scenario] = df
+
+    printing_scenario_id = scenario_ids[0]
+    # EPC breakdown
+    print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts())
+    # Cost
+    # Total cost
+    print(scenario_data[printing_scenario_id]["total_cost"].sum())
+    # Base cost
+    print(scenario_data[printing_scenario_id]["estimated_cost"].sum())
+    # Contingency
+    print(scenario_data[printing_scenario_id]["contingency"].sum())
+    # Costs averaged per unit
+    print(scenario_data[printing_scenario_id]["total_cost"].mean())
+    print(scenario_data[printing_scenario_id]["estimated_cost"].mean())
+    print(scenario_data[printing_scenario_id]["contingency"].mean())
+
+    # Average relative savings
+    print(scenario_data[printing_scenario_id]["relative_carbon_savings"].mean())
+    print(scenario_data[printing_scenario_id]["relative_kwh_savings"].mean())
+    print(scenario_data[printing_scenario_id]["relative_bill_savings"].mean())
+
+    measure_details = {}
+    for scenario in scenario_ids:
+        measure_details[scenario] = {}
+        recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
+        measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict()
+        # Get average cost per measure
+        measure_columns = [
+            c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c
+        ]
+        # Take the mean, drop zero columns
+        measure_costs = {}
+        for m in measure_columns:
+            measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean())
+        measure_details[scenario]["cost_per_measure"] = measure_costs
+
+    pprint(measure_details[scenario_ids[0]]["count"])
+    pprint(measure_details[scenario_ids[1]]["count"])
+
+    # Cost per measures
+    pprint(measure_details[scenario_ids[0]]["cost_per_measure"])
+    pprint(measure_details[scenario_ids[1]]["cost_per_measure"])
+
+    # Do not get to EPC B:
+    # 5 are flats
+    # 1) 34 Luffenham Place, Chicksands SG17 5XH, has been surveyed as having a low performing heat pump -
+    # should be looked at but several surrounding properties have been surveyed in a similar fashion
+    # 2) 42, Muscott Close, Shipton Bellinger SP9 7TX, has an oil boiler and the bills go up recommending HHRSH.
+    #    we could non-intrusively recommend a heat pump.
+    # 3) 33 Blenheim Crescent, Ruislip, HA4 7HA, 100021455241 Solar potential modelling returned nothing -
+    # manual review indicates that there are multiple trees surrouding the south facing side of the property
+    # 4) 10 Bower Green, Shrivenham, SN6 8TU - Solar isn't recommended without further survey due to the local
+    # area being surrounded by trees
+
+    # Scenario adjustments:
+    # Exclude: boiler_upgrade
+    # Make ASHP COP 3.5
+
+    # Metrics we need by scenario:
+    # Cost
+    # contingency
+    # Carbon
+    # kwh
+    # bill savings
+    scenario_metrics = {}
+    for scenario in scenario_ids:
+        df = scenario_data[scenario].copy()
+
+        avg_savings = df[
+            ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
+             "total_cost", "contingency"]
+        ].mean().to_dict()
+        avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"]
+        avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
+        scenario_metrics[scenario] = avg_savings
+
+    pprint(scenario_metrics[scenario_ids[0]])
+    pprint(scenario_metrics[scenario_ids[1]])
+
+    scenario_data[scenario_ids[0]]["loft_insulation"][
+        scenario_data[scenario_ids[0]]["loft_insulation"] > 0
+        ].mean()
+
+    scenario_data[scenario_ids[0]]["cavity_wall_insulation"][
+        scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0
+        ].mean()
+
+    # Testing checking floor risk
+
+    import requests
+
+    def get_flood_risk(lat, lon, radius_km=1):
+        url = "https://environment.data.gov.uk/flood-monitoring/id/floods"
+        params = {
+            'lat': lat,
+            'long': lon,
+            'dist': radius_km  # search radius in km
+        }
+
+        response = requests.get(url, params=params)
+        response.raise_for_status()
+        data = response.json()
+
+        flood_warnings = data.get("items", [])
+
+        if not flood_warnings:
+            print("No active flood warnings near this location.")
+        else:
+            print(f"{len(flood_warnings)} warning(s) found near the location:")
+            for warning in flood_warnings:
+                print(f"- Area: {warning.get('description')}")
+                print(f"  Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})")
+                print(f"  Message changed at: {warning.get('timeMessageChanged')}")
+                print()
+
+        return flood_warnings
+
+    from shapely.geometry import shape, Point
+    def get_flood_areas_near_point(lat, lon, radius_km=2):
+        url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas"
+        params = {
+            'lat': lat,
+            'long': lon,
+            'dist': radius_km
+        }
+
+        response = requests.get(url, params=params)
+        response.raise_for_status()
+        return response.json().get("items", [])
+
+    def point_in_flood_area(lat, lon):
+        flood_areas = get_flood_areas_near_point(lat, lon, radius_km=1)
+        point = Point(lon, lat)  # GeoJSON uses (lon, lat) format
+
+        for area in flood_areas:
+            polygon_url = area.get("polygon")
+            if not polygon_url:
+                continue
+
+            polygon_response = requests.get(polygon_url)
+            polygon_response.raise_for_status()
+            polygon_geojson = polygon_response.json()
+
+            features = polygon_geojson.get("features", [])
+            if not features:
+                continue
+
+            flood_polygon = shape(features[0]['geometry'])
+
+            try:
+                is_inside = flood_polygon.contains(point)
+            except:
+                is_inside = False
+
+            if is_inside:
+                print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})")
+                return area
+
+    from tqdm import tqdm
+    floor_warnings_data = []
+    for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)):
+        # warnings = floor_warnings_data.extend(
+        #     get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1)
+        # )
+
+        resp = point_in_flood_area(lat=property["LATITUDE"], lon=property["LONGITUDE"])
+        if resp:
+            floor_warnings_data.append(
+                {
+                    "uprn": property["uprn"],
+                    "address": property["address"],
+                    "postcode": property["postcode"],
+                    "area": resp
+                }
+            )
+            continue
+
+    import plotly.graph_objects as go
+
+    labels = [
+        "House_Cavity_Insulated_Pitched roof_Pre 1970",
+        "House_Cavity_Insulated_Pitched roof_Post 1970",
+        "House_Cavity_Uninsulated_Pitched roof_Pre 1970",
+        "House_Cavity_Uninsulated_Pitched roof_Post 1970",
+        "other",
+        "House_System_Uninsulated_Pitched roof_Pre 1970",
+        "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970"
+    ]
+
+    values = [62, 36, 21, 16, 16, 4, 2]
+
+    hovertext = [
+        "Loft insulation, draft proofing",
+        "Top-up loft insulation",
+        "Cavity wall insulation, loft insulation",
+        "Cavity wall insulation, ventilation",
+        "Bespoke retrofit measures",
+        "External wall insulation, roof insulation",
+        "Flat roof insulation, internal wall insulation"
+    ]
+
+    fig = go.Figure(go.Treemap(
+        labels=labels,
+        parents=[""] * len(labels),  # No root
+        values=values,
+        hovertext=hovertext,
+        hoverinfo="text",
+        textinfo="none",
+        marker=dict(
+            line=dict(color="white", width=4),
+            colors=values,
+            colorscale="Blues"
+        )
+    ))
+
+    fig.update_layout(
+        margin=dict(t=10, l=10, r=10, b=10),
+        plot_bgcolor="white",
+        paper_bgcolor="white"
+    )
+
+    fig.show()
+
+    # Get the recommended measures by scenario id
+    recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c]
+    measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[
+        recommendation_cols
+    ].sum().reset_index()
+
+    measure_counts_by_scenario.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv"
+    )
+
+    # Estimate average valuation improvment by scenarios
+    valuation_data = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/property_valuation.csv"
+    )
+
+    from backend.ml_models.Valuation import PropertyValuation
+
+    uplift = []
+    for _, x in valuation_data.iterrows():
+        uprn = x["uprn"]
+
+        to_append = {"uprn": uprn}
+        for _id in scenario_ids:
+            scenario = scenario_data[_id][
+                scenario_data[_id]["uprn"] == uprn
+                ].squeeze()
+
+            val = PropertyValuation.estimate_valuation_improvement(
+                current_value=x["valuation"],
+                current_epc=scenario["Current EPC Rating"].value,
+                target_epc=scenario["Predicted Post Works EPC"],
+                total_cost=None
+            )
+
+            to_append[_id] = val["average_increase"]
+
+        uplift.append(to_append)
+
+    uplift = pd.DataFrame(uplift)
+    print(uplift[scenario_ids[0]].mean())
+    # £8,161
+    print(uplift[scenario_ids[1]].mean())
+    # £16,938
--- a/etl/customers/mod/pilot/3.
+++ b/etl/customers/mod/pilot/3.
@ -0,0 +1,76 @@
+import pandas as pd
+
+# Get the wave 2 costing data and produce some breakdowns
+costs = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/Measure cost study for MOD.xlsx",
+    header=2
+)
+
+# Get the EPC data for these
+
+
+# Cavity
+cwi_costs = costs[
+    ['Model', 'Total invoiced (including VAT)']
+].copy()
+cwi_costs["Model"] = "CWI - " + cwi_costs["Model"]
+cwi_costs = cwi_costs[~pd.isnull(cwi_costs["Total invoiced (including VAT)"])]
+
+# Loft
+li_costs = costs[
+    ['Model.2', 'Total invoiced (including VAT).2']
+].copy()
+li_costs["Model.2"] = "LI - " + li_costs["Model.2"]
+li_costs = li_costs[~pd.isnull(li_costs["Total invoiced (including VAT).2"])]
+# Rename
+li_costs.columns = ["Model", "Total invoiced (including VAT)"]
+
+# Windows
+windows_costs = costs[
+    ['Model.3', 'Total invoiced (including VAT).3']
+].copy()
+windows_costs["Model.3"] = "Windows - " + windows_costs["Model.3"]
+windows_costs = windows_costs[~pd.isnull(windows_costs["Total invoiced (including VAT).3"])]
+# Rename
+windows_costs.columns = ["Model", "Total invoiced (including VAT)"]
+
+# Doors
+doors_costs = costs[
+    ['Model.4', 'Total invoiced (including VAT).4']
+].copy()
+doors_costs["Model.4"] = "Doors - " + doors_costs["Model.4"]
+doors_costs = doors_costs[~pd.isnull(doors_costs["Total invoiced (including VAT).4"])]
+# Rename
+doors_costs.columns = ["Model", "Total invoiced (including VAT)"]
+
+# ASHP
+ashps_costs = costs[
+    ['Model.5', 'Total invoiced (including VAT).5']
+].copy()
+ashps_costs["Model.5"] = "ASHP - " + ashps_costs["Model.5"]
+ashps_costs = ashps_costs[~pd.isnull(ashps_costs["Total invoiced (including VAT).5"])]
+# Rename
+ashps_costs.columns = ["Model", "Total invoiced (including VAT)"]
+
+# Solar
+solar_costs = costs[
+    ['Model.6', 'Total invoiced (including VAT).6']
+].copy()
+solar_costs["Model.6"] = "Solar - " + solar_costs["Model.6"]
+solar_costs = solar_costs[~pd.isnull(solar_costs["Total invoiced (including VAT).6"])]
+# Rename
+solar_costs.columns = ["Model", "Total invoiced (including VAT)"]
+
+fabric_costing_data = pd.concat([cwi_costs, li_costs])
+windows_doors_costing_data = pd.concat([windows_costs, doors_costs])
+
+windows_doors_costing_data.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/windows_doors_costs.csv"
+)
+fabric_costing_data.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/fabric_costing_data.csv"
+)
+ashps_costs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/ashps_costs.csv")
+solar_costs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/solar_costs.csv")
+
+project_cost_by_age = costs[["Property age ", "TOTAL Cost of Works"]].groupby("Property age ").mean().reset_index()
--- a/etl/customers/panacap/assets.py
+++ b/etl/customers/panacap/assets.py
@ -0,0 +1,61 @@
+import os
+
+import pandas as pd
+from dotenv import load_dotenv
+
+from etl.spatial.OpenUprnClient import OpenUprnClient
+from etl.route_march_data_pull.app import get_data
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+addresses = [
+    {"address": "3 Willis Road", "postcode": "CB1 2AQ"},
+    {"address": "22 Catharine Street", "postcode": "CB1 3AW"},
+    {"address": "332 Mill Road", "postcode": "CB1 3NN"},
+    {"address": "330 Mill Road", "postcode": "CB1 3NN"},
+    {"address": "328 Mill Road", "postcode": "CB1 3NN"},
+    {"address": "71 Mill Road", "postcode": "CB1 2AS"},
+    {"address": "78 Argyle Street", "postcode": "CB1 3LZ"},
+    {"address": "9 Graham Road", "postcode": "CB4 2ZE"},
+    {"address": "217 Mill Road", "postcode": "CB1 3BE"},
+    {"address": "374 Mill Road", "postcode": "CB1 3NN"},
+    {"address": "174 Thoday Street", "postcode": "CB1 3AX"},
+    {"address": "37 Abbey Road", "postcode": "CB5 8HH"},
+    {"address": "18 Upper Gwydir Street", "postcode": "CB1 2LR"},
+    {"address": "21 Fulbourn Road Fulbourn", "postcode": "CB1 9JL"},
+    {"address": "108 Argyle Street", "postcode": "CB1 3LS"},
+    {"address": "115 Victoria Road", "postcode": "CB4 3BS"},
+    {"address": "55 Ross Street", "postcode": "CB1 3BP"},
+    {"address": "16 Kingston Street", "postcode": "CB1 2NU"},
+    {"address": "13 Thoday Street", "postcode": "CB1 3AS"},
+    {"address": "103 York Street", "postcode": "CB1 2PZ"},
+]
+
+asset_list = pd.DataFrame(addresses)
+asset_list["row_id"] = asset_list.index
+
+epc_data, _, _ = get_data(
+    asset_list=asset_list, fulladdress_column="address", postcode_column="postcode", address1_column="address",
+    manual_uprn_map={}, epc_api_only=True
+)
+
+epc_df = pd.DataFrame(epc_data)
+epc_df.shape
+
+asset_list = asset_list.merge(
+    epc_df, how="left", on="row_id"
+)
+
+asset_list = asset_list.rename(columns={"address_x": "Address", "postcode_x": "Postcode"})
+asset_list["uprn"] = asset_list["uprn"].astype(str)
+
+spatial_data = OpenUprnClient.get_spatial_data([x["uprn"] for x in epc_data], bucket_name="retrofit-data-dev")
+spatial_data["UPRN"] = spatial_data["UPRN"].astype(str)
+
+asset_list = asset_list.merge(
+    spatial_data, how="left", left_on="uprn", right_on="UPRN"
+)
+
+asset_list.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Panacap/Acquisitions EPC Data.csv",
+                  index=False)
--- a/etl/customers/remote_assessments/app.py
+++ b/etl/customers/remote_assessments/app.py
@ -1,9 +1,15 @@
+import os
 import pandas as pd
+from dotenv import load_dotenv
 from utils.s3 import save_csv_to_s3
+from etl.find_my_epc.AssetListEpcData import AssetListEpcData

-PORTFOLIO_ID = 111
+PORTFOLIO_ID = 141
 USER_ID = 8

+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+

 def app():
    """
@ -13,10 +19,21 @@ def app():

    asset_list = [
        {
-            "uprn": 100050770761,
-            "address": "12 Sheardown Street",
-            "postcode": "DN4 0BH"
-        }
+            "address": "196 Merrow Street",
+            "postcode": "SE17 2NP",
+            "uprn": 200003423454,
+            "patch": True
+        },
+        {
+            "address": "65 Liverpool Grove",
+            "postcode": "SE17 2HP",
+            "uprn": 200003423194
+        },
+        {
+            "address": "2 Brettell Street",
+            "postcode": "SE17 2NZ",
+            "uprn": 200003423607
+        },
    ]
    asset_list = pd.DataFrame(asset_list)

@ -28,30 +45,46 @@ def app():
        file_name=filename
    )

-    non_invasive_recommendations = [
-        {
-            "uprn": 100050770761,
-            "recommendations": [
-                {
-                    "type": "extension_cavity_wall_insulation",
-                    "sap_points": 2,
-                }
-            ]
-        }
-    ]
+    # Pull the non-invasive recommendations automatically
+    asset_list_epc_client = AssetListEpcData(
+        asset_list=asset_list,
+        epc_auth_token=EPC_AUTH_TOKEN
+    )
+    asset_list_epc_client.get_data()
+    asset_list_epc_client.get_non_invasive_recommendations()
+    asset_list_epc_client.get_patch()
+
    # Store non-invasive recommendations in S3
    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
    save_csv_to_s3(
-        dataframe=pd.DataFrame(non_invasive_recommendations),
+        dataframe=pd.DataFrame(asset_list_epc_client.non_invasive_recommendations),
        bucket_name="retrofit-plan-inputs-dev",
        file_name=non_invasive_recommendations_filename
    )

+    # Store patches in S3
+    patches_filename = ""
+    if asset_list_epc_client.patches:
+        patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv"
+        save_csv_to_s3(
+            dataframe=pd.DataFrame(asset_list_epc_client.patches),
+            bucket_name="retrofit-plan-inputs-dev",
+            file_name=patches_filename
+        )
+
    valuation_data = [
        {
-            "uprn": 100050770761,
-            "value": 67_000
-        }
+            "valuation": 339_000,
+            "uprn": 200003423454,
+        },
+        {
+            "valuation": 374_000,
+            "uprn": 200003423194
+        },
+        {
+            "valuation": 719_000,
+            "uprn": 200003423607
+        },
    ]
    # Store valuation data to s3
    valuation_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuation.csv"
@ -68,7 +101,7 @@ def app():
        "goal_value": "C",
        "trigger_file_path": filename,
        "already_installed_file_path": "",
-        "patches_file_path": "",
+        "patches_file_path": patches_filename,
        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
        "valuation_file_path": valuation_filename,
        "scenario_name": "Full package remote assessment",
--- a/etl/customers/settle/route_march_2024_11_08.py
+++ b/etl/customers/settle/route_march_2024_11_08.py
@ -0,0 +1,226 @@
+import os
+import time
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(asset_list):
+    epc_data = []
+    errors = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        try:
+            postcode = home["Postcode"]
+            house_number = home["AddressLine1"]
+            full_address = ", ".join([home["AddressLine1"], home["AddressLine4"], home["AddressLine5"]])
+
+            searcher = SearchEpc(
+                address1=str(house_number),
+                postcode=postcode,
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address,
+                max_retries=5
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+            if searcher.newest_epc is None:
+                continue
+
+            # Look for EPC recommendatons
+            try:
+                property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+            except:
+                property_recommendations = {"rows": []}
+
+            epc = {
+                "row_id": home["row_id"],
+                **searcher.newest_epc.copy(),
+                "recommendations": property_recommendations["rows"]
+            }
+
+            epc_data.append(epc)
+        except Exception as e:
+            errors.append(home["row_id"])
+            time.sleep(5)
+
+    return epc_data, errors
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by Livewest
+
+    Data request contents:
+    Date of last EPC
+    Reason for EPC
+    SAP score on register
+    Property Type
+    Property Area
+    Property Age
+    Any Dimensions (HLP,PW,RH)
+    Property Wall Construction
+    Heating Type
+    Secondary Heating
+    Loft Insulation Depth
+
+    Additional if possible:
+    Heat loss calculations
+    EPC recommendations
+    Property UPRN
+
+    """
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/SETTLE FULL PROPOSED PROGRAMME.xlsx",
+        header=0
+    )
+    asset_list["row_id"] = asset_list.index
+
+    epc_data, errors = get_data(asset_list)
+
+    # We now retrieve any failed properties
+    asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
+    epc_data_failed, _ = get_data(asset_list_failed)
+
+    # Append the failed data to the main data
+    epc_data.extend(epc_data_failed)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # We expand out the recommendations
+    recommendations_df = epc_df[["row_id", "recommendations"]]
+
+    unique_recommendations = set()
+    for _, row in recommendations_df.iterrows():
+        unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+
+    columns = ["row_id"] + list(unique_recommendations)
+    transformed_data = []
+    for _, row in recommendations_df.iterrows():
+        # Initialize a dictionary for this row with False for all recommendations
+        row_data = {col: False for col in columns}
+        row_data["row_id"] = row["row_id"]
+
+        # Set True for each recommendation present in this row
+        for rec in row["recommendations"]:
+            recommendation_text = rec["improvement-summary-text"]
+            row_data[recommendation_text] = True
+
+        # Append the row data to transformed_data
+        transformed_data.append(row_data)
+
+    transformed_df = pd.DataFrame(transformed_data)
+    # Drop the column that is ""
+    transformed_df = transformed_df.drop(columns=[""])
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "row_id",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description",
+            #
+            "energy-consumption-current",  # kwh/m2
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        on="row_id"
+    ).merge(
+        transformed_df,
+        how="left",
+        on="row_id"
+    )
+
+    asset_list = asset_list.drop(columns=["row_id"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "total-floor-area": "Property Floor Area",
+        "construction-age-band": "Property Age Band",
+        "floor-height": "Property Floor Height",
+        "number-habitable-rooms": "Number of Habitable Rooms",
+        "walls-description": "Wall Construction",
+        "roof-description": "Roof Construction",
+        "mainheat-description": "Heating Type",
+        "secondheat-description": "Secondary Heating",
+        "transaction-type": "Reason for last EPC",
+        "energy-consumption-current": "Heat Demand (kWh/m2)"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
+            x["Property Type"]) else None, axis=1
+    )
+
+    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+    # Replace "" value with None
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ), axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["Archetype"]
+        ),
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
+            x["Roof Construction"]) else None,
+        axis=1
+    )
+
+    # Store as an excel
+    filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/Settle EPC Data pull - 08 Nov 2024.xlsx"
+    asset_list.to_excel(filename, index=False)
--- a/etl/customers/southend/epc_data_pull_2024_11_14.py
+++ b/etl/customers/southend/epc_data_pull_2024_11_14.py
@ -0,0 +1,231 @@
+import os
+import time
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(asset_list):
+    epc_data = []
+    errors = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        try:
+            postcode = home["Postcode"]
+            address1 = home["address1"].split(",")[0]
+            full_address = home["Address"]
+
+            searcher = SearchEpc(
+                address1=str(address1),
+                postcode=postcode,
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address,
+                max_retries=5
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+            if searcher.newest_epc is None:
+                continue
+
+            # Look for EPC recommendatons
+            try:
+                property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+            except:
+                property_recommendations = {"rows": []}
+
+            epc = {
+                "row_id": home["row_id"],
+                **searcher.newest_epc.copy(),
+                "recommendations": property_recommendations["rows"]
+            }
+
+            epc_data.append(epc)
+        except Exception as e:
+            errors.append(home["row_id"])
+            time.sleep(5)
+
+    return epc_data, errors
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by Livewest
+
+    Data request contents:
+    Date of last EPC
+    Reason for EPC
+    SAP score on register
+    Property Type
+    Property Area
+    Property Age
+    Any Dimensions (HLP,PW,RH)
+    Property Wall Construction
+    Heating Type
+    Secondary Heating
+    Loft Insulation Depth
+
+    Additional if possible:
+    Heat loss calculations
+    EPC recommendations
+    Property UPRN
+
+    """
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/Southend Planned programme.xlsx",
+        header=0,
+        sheet_name="Planned RM"
+    )
+    asset_list["row_id"] = asset_list.index
+    asset_list["address1"] = asset_list["Address"].str.split(",").str[0]
+
+    epc_data, errors = get_data(asset_list)
+
+    # We now retrieve any failed properties
+    asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
+    epc_data_failed, _ = get_data(asset_list_failed)
+
+    # Append the failed data to the main data
+    epc_data.extend(epc_data_failed)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # We expand out the recommendations
+    recommendations_df = epc_df[["row_id", "recommendations"]]
+
+    unique_recommendations = set()
+    for _, row in recommendations_df.iterrows():
+        unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+
+    columns = ["row_id"] + list(unique_recommendations)
+    transformed_data = []
+    for _, row in recommendations_df.iterrows():
+        # Initialize a dictionary for this row with False for all recommendations
+        row_data = {col: False for col in columns}
+        row_data["row_id"] = row["row_id"]
+
+        # Set True for each recommendation present in this row
+        for rec in row["recommendations"]:
+            recommendation_text = rec["improvement-summary-text"]
+            row_data[recommendation_text] = True
+
+        # Append the row data to transformed_data
+        transformed_data.append(row_data)
+
+    transformed_df = pd.DataFrame(transformed_data)
+    # Drop the column that is ""
+    transformed_df = transformed_df.drop(columns=[""])
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "row_id",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description",
+            #
+            "energy-consumption-current",  # kwh/m2
+            "photo-supply",
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        on="row_id"
+    ).merge(
+        transformed_df,
+        how="left",
+        on="row_id"
+    )
+
+    asset_list = asset_list.drop(columns=["row_id"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "total-floor-area": "Property Floor Area",
+        "construction-age-band": "Property Age Band",
+        "floor-height": "Property Floor Height",
+        "number-habitable-rooms": "Number of Habitable Rooms",
+        "walls-description": "Wall Construction",
+        "roof-description": "Roof Construction",
+        "mainheat-description": "Heating Type",
+        "secondheat-description": "Secondary Heating",
+        "transaction-type": "Reason for last EPC",
+        "energy-consumption-current": "Heat Demand (kWh/m2)",
+        "photo-supply": "% of the Roof with PV"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
+            x["Property Type"]) else None, axis=1
+    )
+
+    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+    # Replace "" value with None
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ), axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["Archetype"]
+        ),
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
+            x["Roof Construction"]) else None,
+        axis=1
+    )
+
+    # Store as an excel
+    filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/southend EPC Data pull - 14 Nov "
+                "2024.xlsx")
+    asset_list.to_excel(filename, index=False)
--- a/etl/customers/stonewater/Wave
+++ b/etl/customers/stonewater/Wave
--- a/etl/customers/stonewater/data_cleaning.py
+++ b/etl/customers/stonewater/data_cleaning.py
@ -0,0 +1,155 @@
+import os
+import shutil
+from tqdm import tqdm
+from etl.access_reporting.app import SharePointClient
+
+
+def delete_large_files():
+    """
+    This function deletes photos, designs and other files which we don't need
+    :return:
+    """
+
+    folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys"
+
+    # List the contents of this folder since in each sub-folder we have the property folders
+    contents = os.listdir(folder_path)
+
+    for subfolder in contents:
+        if not os.path.isdir(os.path.join(folder_path, subfolder)):
+            continue
+        subfolder_path = os.path.join(folder_path, subfolder)
+        # List the contents
+        property_folders = os.listdir(subfolder_path)
+
+        for property in tqdm(property_folders):
+            # Check if it's a directory
+            if not os.path.isdir(os.path.join(subfolder_path, property)):
+                continue
+
+            property_path = os.path.join(subfolder_path, property)
+            property_contents = os.listdir(property_path)
+            # We delete the contents of the following folders:
+            # '1. RA Property Pics'
+            # '4. Air Tightness Tests'
+            # '5. RD Design Info'
+            for folder_to_delete in ["1. RA Property Pics", "4. Air Tightness Tests", "5. RD Design Info",
+                                     "1. RA Property PIcs", "Post EPC Photos", "4. RD Design Info",
+                                     "5. Installer Info", "6. Trustmark lodgement", "7.Post Install Inspection Photos",
+                                     "6. Trustmark Lodgement", "7. Post Inspection Photos"]:
+                if folder_to_delete not in property_contents:
+                    continue
+                folder_to_delete_path = os.path.join(property_path, folder_to_delete)
+                if os.path.isdir(folder_to_delete_path):
+                    # Delete the folder, even if it's not empty
+                    shutil.rmtree(folder_to_delete_path)
+
+            # We now check the '2. RA Coordinator Info' folder for any .MOV files and delete them
+            if "2. RA Coordinator Info" not in property_contents:
+                coordinator_folder = "1. RA Coordinator Info"
+            else:
+                coordinator_folder = "2. RA Coordinator Info"
+            coordinator_info_path = os.path.join(property_path, coordinator_folder)
+            coordinator_info_contents = os.listdir(coordinator_info_path)
+            # Look for .MOV files and .jpg files
+            for file in coordinator_info_contents:
+                if file.endswith(".MOV"):
+                    os.remove(os.path.join(coordinator_info_path, file))
+
+                if file.endswith(".jpg"):
+                    os.remove(os.path.join(coordinator_info_path, file))
+
+            if "Property Pics" in coordinator_info_contents:
+                # Delete folder and contents
+                shutil.rmtree(os.path.join(coordinator_info_path, "Property Pics"))
+
+
+def download_data_from_sharepoint():
+    # Given a sharepoint location, this function will download the retrofit assessment folders from the locations
+    # specified in the sharepoint location
+
+    SHAREPOINT_CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID", None)
+    SHAREPOINT_CLIENT_SECRET = os.getenv("SHAREPOINT_CLIENT_SECRET", None)
+    SHAREPOINT_TENANT_ID = os.getenv("SHAREPOINT_TENANT_ID", None)
+    OSMOSIS_SHAREPOINT_SITE_ID = os.getenv("OSMOSIS_SHAREPOINT_SITE_ID", None)
+
+    sharepoint_client = SharePointClient(
+        tenant_id=SHAREPOINT_TENANT_ID,
+        client_id=SHAREPOINT_CLIENT_ID,
+        client_secret=SHAREPOINT_CLIENT_SECRET,
+        site_id=OSMOSIS_SHAREPOINT_SITE_ID
+    )
+
+    # Retrieve the data from Sharepoint and write to local machine
+    contents = sharepoint_client.list_folder_contents(
+        drive_id=sharepoint_client.document_drive["id"],
+        folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
+    )
+
+    folders_to_keep = [
+        "1. Herefordshire", "2. Bedfordshire", "3. Wiltshire", "4. Bournemouth",
+        "5. Coventry", "6. West Sussex", "7. Dorset", "8. Cambridgeshire",
+        "9. Guildford", "10. Little Island", "11. CCS Dorset",
+    ]
+
+    folders_to_pull = [
+        folder for folder in contents["value"] if folder["name"] in folders_to_keep
+    ]
+    for folder_to_pull in folders_to_pull:
+        
+        # Get the contents
+        folder_contents = sharepoint_client.list_folder_contents(
+            drive_id=sharepoint_client.document_drive["id"],
+            folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
+                        folder_to_pull["name"],
+            page_size=100
+        )
+
+        property_folders = [f for f in folder_contents["value"]]
+
+        for property_folder in property_folders:
+            # We go into each property folder and get the contents
+            property_folder_contents = sharepoint_client.list_folder_contents(
+                drive_id=sharepoint_client.document_drive["id"],
+                folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
+                            folder_to_pull["name"] + "/" + property_folder["name"]
+            )
+            if not property_folder_contents.get("value"):
+                continue
+            # We look for the retrofit assessment folder or mtp folders:
+            property_sub_folders = [
+                f for f in property_folder_contents["value"] if
+                "ra coordinator info" in f["name"].lower() or
+                "retrofit assessment" in f["name"].lower() or
+                "ra info" in f["name"].lower() or
+                "mtp" in f["name"].lower() or
+                "mid-term" in f["name"].lower()
+            ]
+
+            if not property_sub_folders:
+                continue
+
+            for property_sub_folder in property_sub_folders:
+                # if we have this, we download the folder and store it on my laptop!
+
+                property_folder_path = os.path.join(
+                    "Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
+                    folder_to_pull["name"],
+                    property_folder["name"],
+                    property_sub_folder["name"]
+                )
+
+                download_dir = os.path.join(
+                    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys - 2",
+                    folder_to_pull["name"],
+                    property_folder["name"],
+                    property_sub_folder["name"]
+                )
+
+                # We download the folder
+                sharepoint_client.download_sharepoint_folder(
+                    drive_id=sharepoint_client.document_drive["id"],
+                    folder_path=property_folder_path,
+                    download_dir=download_dir,
+                    excluded_file_types=["MOV", "jpg"]
+                )
--- a/etl/customers/stonewater/potential_eco_properties.py
+++ b/etl/customers/stonewater/potential_eco_properties.py
@ -0,0 +1,542 @@
+import os
+import time
+import json
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from utils.s3 import read_from_s3, read_pickle_from_s3
+import msoffcrypto
+from io import BytesIO
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(asset_list):
+    epc_data = []
+    errors = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        try:
+            postcode = home["Postcode"]
+            house_number = home["Number"]
+            full_address = home["Full Address"]
+
+            searcher = SearchEpc(
+                address1=str(house_number),
+                postcode=postcode,
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address,
+                max_retries=5
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+            if searcher.newest_epc is None:
+                continue
+
+            # Look for EPC recommendatons
+            try:
+                property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+            except:
+                property_recommendations = {"rows": []}
+
+            epc = {
+                "row_id": home["row_id"],
+                **searcher.newest_epc.copy(),
+                "recommendations": property_recommendations["rows"]
+            }
+
+            epc_data.append(epc)
+        except Exception as e:
+            errors.append(home["row_id"])
+            time.sleep(5)
+
+    return epc_data, errors
+
+
+def app():
+    """
+    This code creates a list of cavity properties, for review
+    """
+
+    # Read in the password protected master
+    # TODO: This file should be deleted!
+
+    # Path to the password-protected Excel file
+    file_path = ("/Users/khalimconn-kowlessar/Downloads/STONEWATER MASTER SHEET - UPDATED 20.5.24 - K- PASSWORD "
+                 "PROTECTED.xlsx")
+    password = "STONE123"  # Replace with the actual password
+
+    # Open the file and decrypt it
+    with open(file_path, "rb") as f:
+        decrypted_file = BytesIO()
+        office_file = msoffcrypto.OfficeFile(f)
+        office_file.load_key(password=password)
+        office_file.decrypt(decrypted_file)
+
+    # Read the decrypted file into a DataFrame
+    eco_rolling_master = pd.read_excel(decrypted_file, sheet_name="Sheet1", engine="openpyxl")
+
+    eco_rolling_master = eco_rolling_master[
+        ~eco_rolling_master['INSTALL/CANCELLATION DATE'].str.contains("CANCELLED")
+    ]
+
+    archetyped_properties = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - "
+        "Archetyped V3.1.xlsx",
+        header=4
+    )
+
+    cavity_descriptions = [
+        "Cavity: AsBuilt (1983-1995)",
+        "Cavity: AsBuilt (Post 1995)",
+        "Cavity: AsBuilt (Pre 1976)",
+        "Cavity: AsBuilt (1976-1982)",
+    ]
+
+    archetyped_properties["Is Cavity Property"] = archetyped_properties["Wall Type"].isin(cavity_descriptions)
+    # We also identify any properties where properties were found to need cavity wall insulation
+
+    costed_packages = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Costed Retrofit Packages "
+        "20241030 (WIP) Single Model V2.xlsx",
+        sheet_name="Modelled Packages",
+        header=13
+    )
+
+    needs_cwi = costed_packages[
+        costed_packages["Main Wall Insulation"].isin(
+            [
+                "Poss Extract CWI & Refill (issues identified)",
+                "CWI RdSAP Default"
+            ]
+        )
+    ][["Address ID", "Address", "Current SAP Rating", "Current EPC Band", "Postcode", "Archetype ID",
+       "Main Wall Insulation",
+       "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]]
+
+    # We flag these properties
+    archetyped_properties["Survey shows CWI needed for Archetype"] = archetyped_properties["Archetype ID"].isin(
+        needs_cwi["Archetype ID"]
+    )
+
+    archetyped_properties = archetyped_properties[~pd.isnull(archetyped_properties["Address ID"])]
+    archetyped_properties = archetyped_properties[archetyped_properties["Address ID"] != "Address ID"]
+
+    # this is the big list!!!
+    features = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
+        "master sheet.csv",
+        encoding='latin1'
+    )
+    features["Address ID"] = features["Address ID"].astype(str)
+
+    features_to_merge = features[
+        [
+            "Address ID", "Organisation Reference", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating",
+            "Main Fuel",
+            "Hot Water",
+            "Renewables", "Total Floor Area"
+        ]
+    ]
+
+    stonewater_cavity_properties = archetyped_properties[
+        ["Name", "Postcode", "Osm. ID", "Org. ref.", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no",
+         "Street name",
+         "Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"]
+    ].merge(
+        features_to_merge, how="left", on="Address ID"
+    )
+
+    # We filter this down to the properties that are cavity properties
+    stonewater_cavity_properties = stonewater_cavity_properties[
+        stonewater_cavity_properties["Is Cavity Property"] |
+        stonewater_cavity_properties["Survey shows CWI needed for Archetype"]
+        ]
+
+    stonewater_cavity_properties["Reason Included"] = "As Built Cavity Property"
+    stonewater_cavity_properties["Reason Included"] = np.where(
+        stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
+        ~stonewater_cavity_properties["Is Cavity Property"],
+        "Survey revealed potential need for CWI or extract and re-fill",
+        stonewater_cavity_properties["Reason Included"]
+    )
+    stonewater_cavity_properties["Reason Included"] = np.where(
+        stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
+        stonewater_cavity_properties["Is Cavity Property"],
+        "Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
+        stonewater_cavity_properties["Reason Included"]
+    )
+    # We indicate the exact properties that need CWI, based on survey findings
+    stonewater_cavity_properties["Reason Included"] = np.where(
+        stonewater_cavity_properties["Address ID"].isin(
+            needs_cwi[needs_cwi["Main Wall Insulation"] == "CWI RdSAP Default"]["Address ID"].astype(int).astype(
+                str).values
+        ),
+        "Survey showed this property needs CWI",
+        stonewater_cavity_properties["Reason Included"]
+    )
+
+    stonewater_cavity_properties["Reason Included"] = np.where(
+        stonewater_cavity_properties["Address ID"].isin(
+            needs_cwi[needs_cwi["Main Wall Insulation"] == "Poss Extract CWI & Refill (issues identified)"][
+                "Address ID"].astype(int).astype(str).values
+        ),
+        "Survey showed this property could need extract and re-fill",
+        stonewater_cavity_properties["Reason Included"]
+    )
+
+    # We flag units that were installed under ECO3
+    numeric_ids = eco_rolling_master[eco_rolling_master["STONEWATER UPRN"] != "NOT ON ASSET LIST"]
+    numeric_ids = numeric_ids[~pd.isnull(numeric_ids["STONEWATER UPRN"])]
+    numeric_ids["STONEWATER UPRN"] = numeric_ids["STONEWATER UPRN"].astype(int)
+
+    stonewater_cavity_properties["Installed under ECO3"] = stonewater_cavity_properties["Org. ref."].isin(
+        numeric_ids['STONEWATER UPRN'].values
+    )
+
+    # Which postcodes were installed under ECO3
+    priority_list_eco3 = stonewater_cavity_properties[
+        stonewater_cavity_properties["Installed under ECO3"]
+    ]["Postcode"].unique()
+
+    # These are properties that were not installed under ECO3, that have the same postcodes as properties
+    # installed under ECO3
+
+    # These are 66 properties we might want to start with as an immediate priority
+    stonewater_cavity_properties["Same Postcode as Installed under ECO3"] = (
+        ~stonewater_cavity_properties["Installed under ECO3"] & (
+        stonewater_cavity_properties["Postcode"].isin(priority_list_eco3)
+    )
+    )
+
+    stonewater_cavity_properties["UPRN"] = stonewater_cavity_properties["UPRN"].astype("Int64").astype(str)
+    # Find the postcodes where an Osmosis survey revealed a need for CWI
+    postcodes_found_needing_cwi = stonewater_cavity_properties[
+        stonewater_cavity_properties["Reason Included"].isin(
+            [
+                "Survey revealed potential need for CWI or extract and re-fill",
+                "Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
+                "Survey showed this property needs CWI",
+                "Survey showed this property could need extract and re-fill"
+            ]
+        )
+    ]["Postcode"].unique()
+
+    stonewater_cavity_properties["Suspected Needs CWI - not surveyed"] = (
+        (
+            stonewater_cavity_properties[
+                "Postcode"].isin(
+                postcodes_found_needing_cwi)
+        ) & (
+            ~stonewater_cavity_properties[
+                "Reason Included"].isin(
+                [
+                    "Survey revealed potential need "
+                    "for CWI or extract and re-fill",
+                    "Surveyed revealed potential "
+                    "need for CWI or extract and "
+                    "re-fill and is an as built "
+                    "cavity property",
+                    "Survey showed this property "
+                    "needs CWI",
+                    "Survey showed this property "
+                    "could need extract and re-fill"
+                ]
+            )
+        )
+    )
+
+    # Merge the EPCs on, with the data we need
+    stonewater_cavity_properties = stonewater_cavity_properties.rename(
+        columns={
+            "Age": "Parity - Build Age",
+            "Property Type": "Parity - Property Type",
+            "Walls": "Parity - Wall Construction",
+            "Roofs": "Parity - Roof Construction",
+            "Glazing": "Parity - Glazing Type",
+            "Heating": "Parity - Heating Type",
+            "Main Fuel": "Parity - Main Fuel",
+            "Hot Water": "Parity - Hot Water",
+            "Renewables": "Parity - Renewables",
+            "Total Floor Area": "Parity - Total Floor Area"
+        }
+    )
+
+    # We now flag the additional properties in the as built list
+
+    additional_properties = features[
+        ~features["Address ID"].isin(archetyped_properties["Address ID"].values)
+    ]
+
+    # Filter on as built cavity properties
+    additional_properties = additional_properties[
+        additional_properties["Walls"].isin(cavity_descriptions)
+    ]
+    additional_properties["Full Address"] = additional_properties["Address"].copy()
+    house_numbers = []
+    for _, x in tqdm(additional_properties.iterrows(), total=len(additional_properties)):
+        house_no = SearchEpc.get_house_number(x["Address"].split(",")[0], x["Postcode"])
+        if house_no is None:
+            house_no = x["Address"].split(",")[0]
+        # If we end up with a number like "01" we need to remove the leading zero
+        house_no = house_no.lstrip("0")
+        house_numbers.append(
+            {
+                "Address ID": x["Address ID"],
+                "Number": house_no
+            }
+        )
+
+    house_numbers = pd.DataFrame(house_numbers)
+    additional_properties = additional_properties.merge(house_numbers, how="left", on="Address ID")
+    additional_properties["row_id"] = additional_properties["Address ID"].copy()
+
+    # Flag any units in this list that were installed under ECO3
+    additional_properties["Installed under ECO3"] = additional_properties["Organisation Reference"].isin(
+        numeric_ids['STONEWATER UPRN'].values
+    )
+
+    # Additional list ECO3
+    additional_list_eco3 = additional_properties[additional_properties["Installed under ECO3"]]["Postcode"].unique()
+
+    # These are properties that were not installed under ECO3, that have the same postcodes as properties
+    # installed under ECO3
+    # These are 297 properties we might want to start with as an immediate priority
+    additional_properties["Same Postcode as Installed under ECO3"] = (
+        ~additional_properties["Installed under ECO3"] & (
+        additional_properties["Postcode"].isin(additional_list_eco3)
+    )
+    )
+
+    # We do some additional manual checks, for ECO3 properties that were installed that didn't get matched to either
+    # dataaset
+    numeric_ids["In asset list"] = numeric_ids["STONEWATER UPRN"].isin(
+        stonewater_cavity_properties['Org. ref.'].astype(int).values
+    )
+    numeric_ids["In asset list"] = numeric_ids["In asset list"] | (
+        numeric_ids["STONEWATER UPRN"].isin(
+            additional_properties['Organisation Reference'].astype(int).values
+        )
+    )
+
+    # eco3_installs_not_in_asset_list = numeric_ids[~numeric_ids["In asset list"]]
+    # # We now take samples of properties randomly and manually check the ID against the asset list
+    # print(eco3_installs_not_in_asset_list.sample(1)[["STONEWATER UPRN", "Post Code", "NO ", "Street / Block Name", ]])
+    # # Checked STONEWATER UPRN
+    # # 9862, BH15 1NR, 33, THE QUAY FOYER [x]
+    # # 12785, S01 66PN, 57, SEACOLE GARDENS [x]
+    # # 26071,  MK42 0TE,  51,  De Havilland Avenue, Shortstown [x]
+    # # 18213,  HR6 9UW, 20 Ford Street [x]
+    # # 24344, LU4 9FF, 6 SEAL CLOSE [x]
+    # # 31222,  SN14 0QZ, 7 HARDBROOK COURT [x]
+    # # 9343, SP4 7XL, 10 OAK PLACE [x]
+    # # 34730, LU5 5TN, 4 TUDOR DRIVE [x]
+    # # 7021,  BN27 2BZ, 32 BUTTS FIELD []
+    #
+    # stonewater_cavity_properties[stonewater_cavity_properties['Org. ref.'] == 7021]
+    # stonewater_cavity_properties[stonewater_cavity_properties['Postcode'] == "BN27 2BZ"]["Name"]
+    #
+    # additional_properties[additional_properties['Organisation Reference'] == 7021]
+    # additional_properties[additional_properties['Postcode'] == "BN27 2BZ"][["Address"]]
+
+    # Pull the EPCs for these properties
+    # additional_properties_epcs, errors = get_data(additional_properties)
+
+    # Save this data as a pickle
+    # import pickle
+    # with open("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/additional_properties_epcs.pkl",
+    # "wb") as f:
+    #     pickle.dump(additional_properties_epcs, f)
+
+    additional_properties["Suspected Needs CWI - not surveyed"] = (
+        (
+            additional_properties["Postcode"].isin(postcodes_found_needing_cwi) &
+            ~additional_properties["Installed under ECO3"]
+        )
+    )
+
+    # We drop Full Address
+    additional_properties = additional_properties.drop(columns=["Full Address"])
+    additional_properties2 = additional_properties[[
+        "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
+        "Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area", 'Installed under ECO3',
+        'Same Postcode as Installed under ECO3', "Organisation Reference",
+    ]].rename(
+        columns={
+            "Organisation Reference": "Org. ref.",
+            "SAP": "Parity - Predicted SAP",
+            "SAP Band": "Parity - Predicted SAP Band",
+            "Age": "Parity - Build Age",
+            "Property Type": "Parity - Property Type",
+            "Walls": "Parity - Wall Construction",
+            "Roofs": "Parity - Roof Construction",
+            "Glazing": "Parity - Glazing Type",
+            "Heating": "Parity - Heating Type",
+            "Main Fuel": "Parity - Main Fuel",
+            "Hot Water": "Parity - Hot Water",
+            "Renewables": "Parity - Renewables",
+            "Total Floor Area": "Parity - Total Floor Area"
+        }
+    )
+
+    # Combine the data:
+
+    stonewater_cavity_properties2 = stonewater_cavity_properties.merge(
+        features[["Address", "Organisation Reference"]], how="left", on="Organisation Reference"
+    )
+    full_dataset = pd.concat([stonewater_cavity_properties2, additional_properties2])
+    full_dataset = full_dataset.drop(columns=['Osm. ID'])
+
+    # We not define the priority list for non-intrusives
+    full_dataset["Postal Region"] = full_dataset["Postcode"].str.split(" ").str[0].str[0:2]
+    full_dataset["Postal Region 2"] = full_dataset["Postcode"].str.split(" ").str[0]
+
+    # Strip out anything we definitely don't want
+    full_dataset = full_dataset[~full_dataset["Installed under ECO3"]]
+
+    areas = full_dataset[full_dataset["Suspected Needs CWI - not surveyed"] == True]["Postal Region 2"].unique()
+
+    priorities = full_dataset[
+        full_dataset["Postal Region 2"].isin(areas)
+    ]
+
+    region_prevalance = priorities["Postal Region 2"].value_counts().to_frame().reset_index()
+    region_prevalance = region_prevalance[region_prevalance["count"] > 100]
+    df = priorities[priorities["Postal Region 2"].isin(region_prevalance["Postal Region 2"].values)]
+
+    df["Postal Region"].value_counts()
+    df["Postal Region 2"].value_counts()
+
+    if df["Installed under ECO3"].sum():
+        raise ValueError("There are properties in the priority list that were installed under ECO3")
+
+    df.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Non-intrusives/10022025 Non-Intrusives - "
+        "revised list.csv",
+        index=False
+    )
+
+    # We save the data locally
+    # stonewater_cavity_properties.to_csv(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties - priority "
+    #     "postcodes.csv",
+    #     index=False
+    # )
+    # additional_properties2.to_csv(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties - "
+    #     "non-priority postcodes.csv",
+    #     index=False
+    # )
+    # # Save the survey findings
+    # needs_cwi.to_csv(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Properties Needing CWI -
+    #     WIP.csv",
+    #     index=False
+    # )
+
+
+def cross_reference_epc_programme():
+    eco3_fallout = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/STONEWATER LIST OF ADDRESSES TO BE "
+        "SURVEYED - ECO3 NOT COMPLETED.xlsx"
+    )
+
+    for _, x in eco3_fallout.iterrows():
+        house_no = SearchEpc.get_house_number(x["ADDRESS"], "")
+        if house_no is None:
+            house_no = x["ADDRESS"].split(",")[0]
+        x["house_number"] = house_no
+
+    eco3_fallout["house_number"] = eco3_fallout.apply(
+        lambda x: SearchEpc.get_house_number(x["ADDRESS"], ""), axis=1
+    )
+
+    # for _, x in eco3_fallout.ite
+
+    stonewater_modelled_above_c = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
+        "master sheet.csv",
+        encoding='latin1'
+    )
+
+    stonewater_modelled_above_c["house_number"] = stonewater_modelled_above_c.apply(
+        lambda x: SearchEpc.get_house_number(x["Address"], x["Postcode"]), axis=1
+    )
+
+    eco3_fallout_matched_to_above_c = []
+    for _, property in eco3_fallout.iterrows():
+        # Match on house number
+        match = stonewater_modelled_above_c[
+            stonewater_modelled_above_c["house_number"] == property["house_number"]
+            ]
+
+        # We do a fuzzy match on the address, with levenstein distance
+
+        from fuzzywuzzy import fuzz
+        match = stonewater_modelled_above_c[
+            stonewater_modelled_above_c["Address"].apply(lambda x: fuzz.ratio(x, property["ADDRESS"]) > 90)
+        ]
+        match.head()
+
+
+def finalise_list_for_non_intrusives():
+    non_intrusives_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Non-intrusives/20250207 Stonewater "
+        "Non-Intrusives.xlsx"
+    )
+
+    # Remove anything installed under ECO3
+    non_intrusives_list = non_intrusives_list[~non_intrusives_list["Installed under ECO3"]]
+
+    # We make any properties that were surveyed by Osmosis
+    packages = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/Stonewater - Bid Packages WIP 14.11.20 V2 "
+        "(1).xlsx",
+        header=13,
+        sheet_name="Modelled Packages"
+    )
+
+    non_intrusives_list["Surveyed by Osmosis"] = non_intrusives_list["Address ID"].isin(
+        packages["Address ID"].values
+    )
+    # Removed 54 addresses
+    final_non_intrusives = non_intrusives_list[
+        ~non_intrusives_list["Surveyed by Osmosis"]
+    ]
+
+    features = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
+        "master sheet.csv",
+        encoding='latin1'
+    )
+
+    # Add on the orgnisaion reference
+    final_non_intrusives = final_non_intrusives.merge(
+        features[["Organisation Reference", "Address ID"]],
+        how="left",
+        on="Address ID"
+    )
+
+    final_non_intrusives["Postal Region"] = final_non_intrusives["Postcode"].str.split(" ").str[0].str[0:2]
+    selected_regions = final_non_intrusives[
+        final_non_intrusives["Include in non-intrusives"]
+    ]["Postcode"].unique()
+
+    final_non_intrusives["Is in region"] = final_non_intrusives["Postcode"].isin(selected_regions)
+
+    # Filter down:
+    final_non_intrusives = final_non_intrusives[
+        final_non_intrusives["Is in region"]
+    ]
+
+    final_non_intrusives.to_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Non-intrusives/10022025 Non-Intrusives "
+        "List - final.xlsx")
--- a/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt
+++ b/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt
@ -0,0 +1,11 @@
+PyPDF2
+pandas
+tqdm
+openpyxl
+boto3
+epc-api-python==1.0.2
+usaddress==0.5.11
+fuzzywuzzy==0.18.0
+python-dotenv
+scipy
+
--- a/etl/customers/united
+++ b/etl/customers/united
@ -0,0 +1,73 @@
+import os
+import pandas as pd
+import numpy as np
+from asset_list.utils import get_data
+from backend.SearchEpc import SearchEpc
+from etl.spatial.OpenUprnClient import OpenUprnClient
+
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03.xlsx"
+
+    df = pd.read_excel(filepath)
+    df["row_id"] = df.index
+
+    df["house_number"] = df.apply(
+        lambda x: SearchEpc.get_house_number(x["Address"], x["Postcode"]),
+        axis=1
+    )
+
+    properties_data, _, _ = get_data(
+        df=df,
+        manual_uprn_map={},
+        epc_auth_token=EPC_AUTH_TOKEN,
+        uprn_column=None,
+        fulladdress_column="Address",
+        address1_column="house_number",
+        postcode_column="Postcode",
+        property_type_column=None,
+        built_form_column=None,
+        epc_api_only=True,
+        row_id_name="row_id",
+    )
+
+    no_data = df[df["row_id"].isin(_)]
+    no_data[["Address", "Postcode"]]
+
+    # 53  108 Alexandra Street  OL6 9QP 100011536830
+    # 56    301 Whiteacre Road  OL6 9QF 100011557437
+    # 65    97 Princess Street  OL6 9QJ 100011551813
+
+    data = df.merge(
+        pd.DataFrame(properties_data)[["uprn", "row_id"]],
+        how="left", left_on="row_id", right_on="row_id"
+    )
+
+    # Fill missing UPRNS
+    data["uprn"] = np.where(data["Address"] == "108 Alexandra Street", 100011536830, data["uprn"])
+    data["uprn"] = np.where(data["Address"] == "301 Whiteacre Road", 100011557437, data["uprn"])
+    data["uprn"] = np.where(data["Address"] == "97 Princess Street", 100011551813, data["uprn"])
+
+    # We now get whether the property is listed, heritage or in a conservation area
+    spatial_data = OpenUprnClient.get_spatial_data(uprns=data["uprn"].tolist(), bucket_name="retrofit-data-dev")
+    spatial_data = spatial_data.rename(columns={"UPRN": "uprn"})
+
+    data["uprn"] = data["uprn"].astype(int)
+
+    merged = data.merge(
+        spatial_data, how="left", on="uprn"
+    )
+    # fill NAs
+    for c in ['conservation_status', 'is_listed_building', 'is_heritage_building']:
+        merged[c] = merged[c].fillna(False)
+
+    merged.to_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03 - data "
+        "pulled.xlsx",
+        index=False
+    )
--- a/etl/customers/waltham_forest/whlg
+++ b/etl/customers/waltham_forest/whlg
@ -0,0 +1,85 @@
+"""
+This is the list of properties, based on the EPC data, that look eligible for WHLG
+"""
+import pandas as pd
+from etl.epc.settings import EARLIEST_EPC_DATE
+from etl.spatial.OpenUprnClient import OpenUprnClient
+
+epc_data = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E09000031-Waltham-Forest/certificates.csv"
+)
+epc_data.columns = [c.replace("_", "-").lower() for c in epc_data.columns]
+epc_data = epc_data[epc_data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+epc_data = epc_data[~pd.isnull(epc_data["uprn"])]
+epc_data["uprn"] = epc_data["uprn"].astype(int)
+
+epc_data = epc_data[epc_data["current-energy-rating"].isin(["D", "E", "F", "G"])]
+epc_data = epc_data[epc_data["tenure"].isin(
+    ["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
+]
+
+whlg_eligible_postcodes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes.xlsx",
+    sheet_name="Eligible postcodes",
+    header=1
+)
+# Format:
+whlg_eligible_postcodes = whlg_eligible_postcodes[['Postcode', 'Local Authority']]
+
+uprns = epc_data["uprn"].unique()
+# Get data
+ca_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
+epc_data = epc_data.merge(
+    ca_data[["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]].rename(
+        columns={"UPRN": "uprn"}
+    ),
+    how="left",
+    on="uprn",
+)
+
+epc_data["has_conservation_restrictions"] = (
+    (epc_data["conservation_status"] == True)
+    | (epc_data["is_listed_building"] == True)
+    | (epc_data["is_heritage_building"] == True)
+)
+
+whlg_eligible_postcodes["Local Authority"].value_counts()
+
+whlg_eligible_postcodes = whlg_eligible_postcodes[whlg_eligible_postcodes["Local Authority"] == "Waltham Forest"]
+
+# Pathway 1:
+# Match based on eligible postcodes
+pathway1 = epc_data[epc_data["postcode"].isin(whlg_eligible_postcodes["Postcode"].values)]
+pathway1 = pathway1[
+    [
+        "uprn", "address", "address1", "postcode", "current-energy-rating", "current-energy-efficiency",
+        "lodgement-date",
+        "has_conservation_restrictions", "walls-description", "roof-description", "mainheat-description"
+    ]
+]
+
+pathway1 = pathway1.rename(
+    columns={
+        "current-energy-rating": "EPC Rating", "current-energy-efficiency": "SAP Score",
+        "lodgement-date": "EPC Date", "has_conservation_restrictions": "Conservation Area Restrictions",
+        "walls-description": "Wall Type", "roof-description": "Roof Type", "mainheat-description": "Main Heating"
+    }
+)
+
+pathway1["EPC Date"] = pd.to_datetime(pathway1["EPC Date"]).dt.strftime("%Y-%m-%d")
+# Create a year EPC was lodged
+pathway1["EPC Year"] = pd.to_datetime(pathway1["EPC Date"]).dt.year
+
+low_epc = pathway1[pathway1["EPC Rating"].isin(["F", "G"])]
+low_epc["EPC Rating"].value_counts()
+low_epc.tail(1)[["address", "postcode"]]
+
+pathway1.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest WHLG - Pathway 1 Eligibility.csv",
+    index=False
+)
+
+# Pathway 2 or 3
+# The household will need to be means tested
+pathway2 = epc_data[~epc_data["uprn"].isin(pathway1["uprn"].values)]
--- a/etl/customers/warwick/remote_assessments.py
+++ b/etl/customers/warwick/remote_assessments.py
@ -0,0 +1,123 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 115
+USER_ID = 8
+
+
+def app():
+    """
+    Used to set up the remote assessments for Warwick
+    """
+
+    asset_list = [
+        {
+            "uprn": 10033604792,
+            "address": "Flat 2, 3 Green Street",
+            "postcode": "W1K 6RN"
+        },
+        {
+            "uprn": 10033604794,
+            "address": "Flat 4, 3 Green Street",
+            "postcode": "W1K 6RN"
+        },
+        {
+            "uprn": 10033615515,
+            "address": "Apartment 4, 52 Green Street",
+            "postcode": "W1K 6RS"
+        }
+    ]
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    non_invasive_recommendations = [
+        {
+            "uprn": 10033604792,
+            "recommendations": [
+                {
+                    "type": "internal_wall_insulation",
+                    "sap_points": 16,
+                    "survey": True
+                }
+            ]
+        },
+        {
+            "uprn": 10033604794,
+            "recommendations": [
+                {
+                    "type": "internal_wall_insulation",
+                    "sap_points": 14,
+                    "survey": True
+                }
+            ]
+        },
+        {
+            "uprn": 10033615515,
+            "recommendations": [
+                {
+                    "type": "room_roof_insulation",
+                    "sap_points": 12,
+                    "survey": True
+                },
+                {
+                    "type": "internal_wall_insulation",
+                    "sap_points": 2,
+                    "survey": True
+                }
+            ]
+        }
+    ]
+
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    valuation_data = [
+        {
+            "uprn": 10033604792,
+            "value": 3_692_000
+        },
+        {
+            "uprn": 10033604794,
+            "value": 3_789_000
+        },
+        {
+            "uprn": 10033615515,
+            "value": 3_499_000
+        }
+    ]
+
+    # Store valuation data to s3
+    valuation_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuation.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(valuation_data),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=valuation_filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "valuation_file_path": valuation_filename,
+        "scenario_name": "Full package remote assessment",
+        "multi_plan": True,
+        "budget": None,
+    }
+    print(body)
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@ -1,7 +1,7 @@
 import os
 import re
 import openpyxl
-import Levenshtein
+from fuzzywuzzy import fuzz
 from pathlib import Path
 import msgpack
 from datetime import datetime
@ -2771,7 +2771,8 @@ class DataLoader:
        match_to = [x.replace(" ", "") for x in match_to]

        # Perform matching between full key and match_to
-        distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+        distances = [100 - fuzz.ratio(matching_string, s) for s in match_to]
+        
        best_match_index = distances.index(min(distances))
        # We might want to consider a threshold for the distance, however for the momeny,
        # we don't consider this for the moment
@ -2897,6 +2898,17 @@ class DataLoader:
        # Merge onto the survey list
        survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")

+        # TEMP FOR NEWER WORK
+        # matching_lookup = matching_lookup.merge(
+        #     asset_list[["asset_list_row_id", "UPRN"]], how="left", on="asset_list_row_id"
+        # ).merge(
+        #     survey_list[["survey_list_row_id", "NO.", "Street / Block Name", "Post Code"]],
+        #     how="left", on="survey_list_row_id"
+        # )
+        # matching_lookup.to_csv(
+        #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/surveys_to_assets.csv"
+        # )
+
        return survey_list

    @staticmethod
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
        common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]

        self.df = self.df.loc[
-            :,
-            no_suffix_cols
-            + only_ending_cols
-            + [col for cols in common_cols for col in cols],
-        ]
+                  :,
+                  no_suffix_cols
+                  + only_ending_cols
+                  + [col for cols in common_cols for col in cols],
+                  ]

    def _remove_abnormal_change_in_floor_area(self):
        """
@ -511,7 +511,7 @@ class TrainingDataset(BaseDataset):
                    expanded_df["is_sandstone_or_limestone"]
                    == expanded_df["is_sandstone_or_limestone_ending"]
                )
-            ]
+                ]
        elif component == "floor":
            expanded_df = expanded_df[
                (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@ -528,7 +528,7 @@ class TrainingDataset(BaseDataset):
                    expanded_df["is_to_external_air"]
                    == expanded_df["is_to_external_air_ending"]
                )
-            ]
+                ]
        elif component == "roof":
            expanded_df = expanded_df[
                (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@ -541,7 +541,7 @@ class TrainingDataset(BaseDataset):
                    expanded_df["has_dwelling_above"]
                    == expanded_df["has_dwelling_above_ending"]
                )
-            ]
+                ]

        return expanded_df

--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@ -139,28 +139,22 @@ class EPCRecord:

        self._clean_records_using_epc_records()
        self._clean_with_data_processor()
-
        self._expand_prepared_epc_to_attributes()
-
        self._identify_delta_between_prepared_and_original_records()

        # Process to create uvalues for the single epc record
-
-        # selff.df = self.epc_record_as_dataframe('prepared_epc')
-
+        # self.df = self.epc_record_as_dataframe('prepared_epc')
        # self._feature_generation()
        # self._drop_features()

        return

-        self._expand_description_to_features()
-        self._expand_description_to_uvalues()
-
+        # self._expand_description_to_features()
+        # self._expand_description_to_uvalues()
+        #
        # self._generate_uvalues()
        # self._validate_expanded_description()
        # self._validate_u_values()
-        # etc
-        pass

    def _drop_features(self):
        """
@ -359,6 +353,8 @@ class EPCRecord:
        self._clean_property_dimensions()
        self._clean_number_lighting_outlets()
        self._clean_floor_level()
+        self._clean_floor_height()
+        self._clean_constituency()

        # self._clean_potential_energy_efficiency()
        # self._clean_environment_impact_potential()
@ -387,6 +383,31 @@ class EPCRecord:

        return df

+    def _clean_floor_height(self):
+        """ Remaps anomalies in floor height to the average floor height for the property type """
+        floor_height_data = self.cleaning_data[
+            (self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) &
+            (self.cleaning_data["built_form"] == self.prepared_epc["built-form"])
+            ]
+        average = floor_height_data["floor_height"].mean()
+        sd = floor_height_data["floor_height"].std()
+        # If we're in the top 0.5 percentile of floor heights, we'll set it to the average
+        if self.prepared_epc["floor-height"] > average + 10 * sd:
+            self.prepared_epc["floor-height"] = average
+        if self.prepared_epc["floor-height"] <= 1.665:
+            self.prepared_epc["floor-height"] = average
+
+    def _clean_constituency(self):
+        """
+        We handle the single case of finding a missing constituency by using the local authority
+        """
+        if pd.isnull(self.prepared_epc["constituency"]) or (self.prepared_epc["constituency"] == ""):
+            if self.prepared_epc["local-authority"] != "E06000044":
+                raise NotImplementedError(
+                    "This function is only implemented for Portsmouth, in the single edgecase seen"
+                )
+            self.prepared_epc["constituency"] = "E14000883"
+
    def _clean_floor_level(self):
        """
        This method will clean the floor level, if empty or invalid
--- a/etl/find_my_epc/AssetListEpcData.py
+++ b/etl/find_my_epc/AssetListEpcData.py
@ -0,0 +1,133 @@
+import time
+import pandas as pd
+from tqdm import tqdm
+from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
+from backend.SearchEpc import SearchEpc
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class AssetListEpcData:
+
+    def __init__(self, asset_list: pd.DataFrame, epc_auth_token: str):
+
+        """
+        This class handles pulling data assocaited to an asset list and performs common functions like
+        getting EPC api data, retrieveing data form the find my epc website and extracting non-intrusive
+        recommendations
+        :param asset_list:
+        """
+
+        # Check the asset list contains the correct columns
+
+        self.asset_list = self.check_asset_list(asset_list)
+        self.epc_auth_token = epc_auth_token
+
+        self.extracted_data = None
+        self.non_invasive_recommendations = None
+        self.patches = None
+
+    @staticmethod
+    def check_asset_list(asset_list):
+        # TODO: Update this with pydantic
+
+        return asset_list
+
+    def get_non_invasive_recommendations(self):
+
+        """
+        Extracts non-invasive recommendations in a format that can be used by the engine
+        :return:
+        """
+
+        if self.extracted_data is None:
+            raise ValueError("Please run get_data first")
+
+        self.non_invasive_recommendations = [
+            {
+                "uprn": r.get("uprn"),
+                "address": r["address"],
+                "postcode": r["postcode"],
+                "recommendations": r["recommendations"]
+            } for r in self.extracted_data
+        ]
+
+    def get_patch(self):
+        """
+
+        :return:
+        """
+        if self.extracted_data is None:
+            raise ValueError("extracted data is missing - run get_data first")
+
+        self.patches = [
+            {
+                "uprn": r.get("uprn"),
+                **r.get("patch")
+            } for r in self.extracted_data if r.get("patch")
+        ]
+
+    def get_data(self):
+
+        logger.info("Retrieving data for given asset list")
+
+        # Pull the additional data
+        extracted_data = []
+        for _, home in tqdm(self.asset_list.iterrows(), total=len(self.asset_list)):
+            add1 = home["address"]
+            pc = home["postcode"]
+            # Retrieve the EPC data
+            epc_searcher = SearchEpc(
+                address1=add1,
+                postcode=pc,
+                uprn=home.get("uprn"),
+                auth_token=self.epc_auth_token,
+                os_api_key="",
+            )
+            epc_searcher.ordnance_survey_client.property_type = home.get("property_type")
+            epc_searcher.ordnance_survey_client.built_form = home.get("built_form")
+            epc_searcher.find_property(skip_os=True)
+
+            if epc_searcher.newest_epc is None:
+                continue
+
+            if not pd.isnull(home.get("patch")):
+                epc_searcher.newest_epc["address1"] = add1
+
+            # Attempt both methods:
+            try:
+                find_epc_searcher = RetrieveFindMyEpc(
+                    address=epc_searcher.newest_epc["address1"] + ", " + epc_searcher.newest_epc["address2"],
+                    postcode=epc_searcher.newest_epc["postcode"]
+                )
+                find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+            except Exception as e:
+                logger.error(f"Error retrieving find my epc data: {e}")
+                find_epc_searcher = RetrieveFindMyEpc(
+                    address=epc_searcher.newest_epc["address1"],
+                    postcode=epc_searcher.newest_epc["postcode"]
+                )
+                find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+            time.sleep(0.5)
+            # We need uprn
+
+            to_append = {
+                "uprn": home.get("uprn"),
+                "address": home["address"],
+                "postcode": home["postcode"],
+                **find_epc_data,
+            }
+            if not pd.isnull(home.get("patch")):
+                to_append["patch"] = {
+                    "current-energy-rating": find_epc_data["current_epc_rating"],
+                    "current-energy-efficiency": find_epc_data["current_epc_efficiency"],
+                    "potential-energy-rating": find_epc_data["potential_epc_rating"],
+                    "potential-energy-efficiency": find_epc_data["potential_epc_efficiency"],
+                    **find_epc_data["epc_data"]
+                }
+
+            extracted_data.append(to_append)
+
+        self.extracted_data = extracted_data
+        logger.info("Data Extrction complete")
--- a/etl/find_my_epc/RetrieveFindMyEpc.py
+++ b/etl/find_my_epc/RetrieveFindMyEpc.py
@ -0,0 +1,480 @@
+import re
+import pandas as pd
+import requests
+from bs4 import BeautifulSoup
+from datetime import datetime
+
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class RetrieveFindMyEpc:
+    SEARCH_POSTCODE_URL = (
+        "https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode={postcode_input}"
+    )
+    BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
+
+    HEADERS = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
+                      'Chrome/111.0.0.0 Safari/537.36'
+    }
+
+    def __init__(self, address: str, postcode: str):
+        """
+        This class is tasked with retrieving the latest EPC data from the find my epc website
+        :param address: The address of the property
+        :param postcode: The postcode of the property
+        """
+        self.address = address
+        self.postcode = postcode
+
+        self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower()
+        self.walls = []
+
+    @staticmethod
+    def extract_low_carbon_sources(soup):
+        # Find the section header
+        section_header = soup.find("h3", string="Low and zero carbon energy sources")
+        if not section_header:
+            return {}
+
+        # Locate the list following the header
+        energy_list = section_header.find_next("ul")
+
+        # Extract the list items
+        sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")}
+        return sources
+
+    @staticmethod
+    def get_text(elem):
+        return elem.get_text(strip=True) if elem else None
+
+    def extract_epc_data(self, soup):
+
+        results = {}
+
+        # 1. Total floor area
+        results['total-floor-area'] = int(self.get_text(
+            soup.find("dt", string="Total floor area").find_next_sibling("dd")
+        ).split(" ")[0])
+
+        # Table with features
+        rows = soup.select("table.govuk-table tbody tr")
+
+        rating_map = {
+            "Very poor": "Very Poor",
+            "Very good": "Very Good"
+        }
+
+        def get_feature_row_text(feature_name, index=0):
+            matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text]
+            if len(matches) > index:
+                cells = matches[index].find_all("td")
+                description = self.get_text(cells[0])
+                rating = self.get_text(cells[1])
+                return description, rating_map.get(rating, rating)
+            return None, None
+
+        # 2-3. First wall description and rating
+        results['walls-description'], results['walls-energy-eff'] = get_feature_row_text("Wall", 0)
+
+        # 4-5. First roof description and rating
+        results['roof-description'], results['roof-energy-eff'] = get_feature_row_text("Roof", 0)
+
+        # 6-7. Windows description and rating
+        results['windows-description'], results['windows-energy-eff'] = get_feature_row_text("Window")
+
+        # 8-9. Main heating description and rating
+        results['mainheat-description'], results['mainheat-energy-eff'] = get_feature_row_text("Main heating")
+
+        # 10-11. Main heating control description and rating
+        results['mainheatcont-description'], results['mainheatc-energy-eff'] = get_feature_row_text(
+            "Main heating control"
+        )
+
+        # 12-13. Hot water description and rating
+        results['hotwater-description'], results['hot-water-energy-ef'] = get_feature_row_text("Hot water")
+
+        # 14-15. Lighting description and rating
+        results['lighting-description'], results['lighting-energy-eff'] = get_feature_row_text("Lighting")
+
+        # 16. Floor description
+        results['floor-description'], _ = get_feature_row_text("Floor")
+
+        # 17. Secondary heating description
+        results['secondheat-description'], _ = get_feature_row_text("Secondary heating")
+
+        # 18. Primary energy use
+        p_energy = soup.find(string=lambda t: "primary energy use for this property per year" in t.lower())
+        # We should always have this
+        match = re.search(r"(\d+)\s+kilowatt", p_energy)
+        results['energy-consumption-current'] = int(match.group(1)) if match else None
+
+        # 19. Current CO2 emissions
+        co2_now = soup.find("dd", id="eir-property-produces")
+        # We should always have this
+        match = re.search(r"([\d.]+)", co2_now.text)
+        results['co2-emissions-current'] = float(match.group(1)) if match else None
+        # Need co2-emiss-curr-per-floor-area
+
+        # 20. Potential CO2 emissions
+        co2_pot = soup.find("dd", id="eir-potential-production")
+        match = re.search(r"([\d.]+)", co2_pot.text)
+        results['co2-emissions-potential'] = float(match.group(1)) if match else None
+
+        return results
+
+    def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
+        """
+        For a post code and address, we pull out all the required data from the find my epc website
+        """
+
+        postcode_input = self.postcode.replace(" ", "+")
+        postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
+        postcode_response = requests.get(postcode_search, headers=self.HEADERS)
+
+        postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
+        rows = postcode_res.find_all('tr', class_='govuk-table__row')
+
+        extracted_table = []
+        for row in rows:
+            # Extract the address and URL
+            address_tag = row.find('a', class_='govuk-link')
+            if address_tag is None:
+                continue
+            extracted_address = None
+            extracted_address_url = None
+            if address_tag:
+                extracted_address = address_tag.text.strip()
+                extracted_address_url = address_tag['href']
+
+                extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower()
+                if not extracted_address_cleaned.startswith(self.address_cleaned):
+                    continue
+
+                # If the address is a match, we can extract the data
+
+            # Extract the expiry date
+            expiry_date_tag = row.find('td', class_='govuk-table__cell date')
+            expiry_date = None
+            if expiry_date_tag is not None:
+                expiry_date = expiry_date_tag.parent.find('span').text.strip()
+
+            extracted_table.append(
+                {
+                    "extracted_address": extracted_address,
+                    "extracted_address_url": extracted_address_url,
+                    "expiry_date": datetime.strptime(expiry_date, '%d %B %Y'),
+                }
+            )
+
+        if not extracted_table:
+            raise ValueError("No EPC found")
+
+        if len(extracted_table) > 1:
+            # We take the one with the most recent expiry date
+            extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
+
+        chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
+        epc_certificate = chosen_epc.split('/')[-1]
+
+        address_response = requests.get(chosen_epc, headers=self.HEADERS)
+        address_res = BeautifulSoup(address_response.text, features="html.parser")
+
+        # Key data we want to retrieve:
+        # 1) Rating
+        # 2) Bills estimates
+        # 3) Recommendations and SAP points
+        # 4) Low and zero carbon energy sources
+        # 5) The wall types of the property - used for determining if we have an extension wall insulation#
+        #    recommendation
+
+        ratings = address_res.find('desc', {'id': 'svg-desc'}).text
+        current_rating = ratings.split(".")[0]
+        potential_rating = ratings.split(".")[1]
+        current_sap = int(current_rating.split(' ')[-1])
+
+        # Floor area
+        address_res.find()
+
+        # Retrieve the energy consumption
+        bills = address_res.find('div', {'id': 'bills-affected'})
+        bills_list = bills.find_all('li')
+        if not bills_list:
+            # If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
+            heating_text = None
+            hot_water_text = None
+        else:
+            heating_text = bills_list[0].text
+            hot_water_text = bills_list[1].text
+
+        # Retrieve the recommendations and SAP points
+        recommendations = []
+        recommendations_div = address_res.find('div', class_='epb-recommended-improvements')
+        if recommendations_div:
+            # Find all h3 headers for each step and extract their related information
+            step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
+            previous_sap_score = current_sap
+            previous_epc = current_rating.split(' ')[-6]
+            for step_num, step_header in enumerate(step_headers, start=1):
+                # Extract the step title (the measure)
+                measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
+
+                # Find the div containing the potential rating within the same section
+                potential_rating_div = step_header.find_next(
+                    'div', class_='epb-recommended-improvements__potential-rating'
+                )
+
+                # Check if the potential rating div is found
+                if potential_rating_div:
+                    # Extract the rating text within the SVG text element
+                    extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
+                    if extracted_rating_text is not None:
+                        rating_text = extracted_rating_text.text.strip()
+                    else:
+                        rating_text = " ".join([str(previous_sap_score), previous_epc])
+                    # Parse the rating text to separate the numeric rating and EPC letter
+                    new_rating = int(rating_text.split()[0])
+                    new_epc = rating_text.split()[1]
+
+                    # Append the information as a dictionary to the recommendations list
+                    recommendations.append({
+                        "step": step_num,
+                        "measure": measure_title,
+                        "new_rating": new_rating,
+                        "new_epc": new_epc,
+                        "sap_points": new_rating - previous_sap_score
+                    })
+                    previous_sap_score = new_rating
+                    previous_epc = new_epc
+
+        # Search for the assessment informaton
+        assessment_information = address_res.find('div', {'id': 'information'})
+        # Parse this information
+        rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
+        # Create a dictionary to hold the parsed information
+        assessment_data = {}
+        for row in rows:
+            key = row.find('dt').text.strip()
+            if key == "Type of assessment":
+                # We dont reliably extract this
+                continue
+            value_tag = row.find('dd')
+
+            # Check if value contains a link (email)
+            if value_tag.find('a'):
+                value = value_tag.find('a').text.strip()
+            elif value_tag.find('summary'):
+                value = value_tag.find('span').text.strip()
+            else:
+                value = value_tag.text.strip()
+
+            # These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
+            # get the surveyor's name and email so we make that information clear
+            if key in ["Telephone", "Email"]:
+                if "Assessor's " + key not in assessment_data:
+                    assessment_data["Assessor's " + key] = value
+                else:
+                    assessment_data["Accreditation Scheme's " + key] = value
+                continue
+
+            assessment_data[key] = value
+
+        expected_keys = [
+            'Assessor’s name',
+            "Assessor's Telephone",
+            "Assessor's Email",
+            'Assessor’s ID',
+            'Accreditation scheme',
+            'Assessor’s declaration',
+            "Accreditation Scheme's Telephone",
+            "Accreditation Scheme's Email",
+            'Date of assessment',
+            'Date of certificate'
+        ]
+        # Check we have all the expected keys
+        for key in expected_keys:
+            if key not in assessment_data:
+                raise ValueError(f"Missing key: {key}")
+
+        # The wall types of the property
+        property_features_table = address_res.find("tbody", class_="govuk-table__body")
+        property_features_table = property_features_table.find_all("tr")
+
+        # Extract wall types
+        self.walls = []
+        for row in property_features_table:
+            cells = row.find_all("td")
+            if row.find("th").text.strip() == "Wall":
+                self.walls.append(cells[0].text.strip())
+
+        # Finally, we format the recommendations
+        recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
+
+        # 4) Low and zero carbon energy sources
+        low_carbon_energy_sources = self.extract_low_carbon_sources(address_res)
+
+        # 5) Pull out the EPC data
+        epc_data = self.extract_epc_data(address_res)
+
+        resulting_data = {
+            'epc_certificate': epc_certificate,
+            'current_epc_rating': current_rating.split(' ')[-6],
+            'current_epc_efficiency': current_sap,
+            'potential_epc_rating': potential_rating.split(' ')[-6],
+            "potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
+            "heating_text": heating_text,
+            "hot_water_text": hot_water_text,
+            "recommendations": recommendations,
+            "epc_data": epc_data,
+            **assessment_data,
+            **low_carbon_energy_sources,
+        }
+
+        return resulting_data
+
+    def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None):
+        """
+        This function converts the recommendations to a format that we can use in the engine as a non-intrusive survey
+        :param recommendations: The recommendations from the EPC
+        :param assessment_data: The assessment data from the EPC
+        :param sap_2012_date: The date of the SAP 2012 update
+        """
+
+        measure_map = {
+            "Internal or external wall insulation": ["internal_wall_insulation", "external_wall_insulation"],
+            "Hot water cylinder insulation": ["hot_water_tank_insulation"],
+            "Hot water cylinder thermostat": ["cylinder_thermostat"],
+            "High performance external doors": ["insulated_doors"],
+            "Floor insulation (solid floor)": ["solid_floor_insulation"],
+            "Floor insulation (suspended floor)": ["suspended_floor_insulation"],
+            "Double glazed windows": ["double_glazing"],
+            "Cavity wall insulation": ["cavity_wall_insulation"],
+            "Replace boiler with new condensing boiler": ["boiler_upgrade"],
+            "Floor insulation": ["floor_insulation"],  # Recommendation typically associated to older EPCs
+            "Heating controls (programmer, room thermostat and TRVs)": [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ],
+            "Low energy lighting": ["low_energy_lighting"],
+            "Increase loft insulation to 270 mm": ["loft_insulation"],
+            "Heating controls (thermostatic radiator valves)": [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ],
+            "Solar water heating": ["solar_water_heating"],
+            "Solar photovoltaic panels, 2.5 kWp": ["solar_pv"],
+            "Heating controls (room thermostat and TRVs)": [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ],
+            "Change heating to gas condensing boiler": ["boiler_upgrade"],
+            "Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heater"],
+            "Flat roof or sloping ceiling insulation": ["flat_roof_insulation"],
+            "Heating controls (room thermostat)": [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ],
+            "Band A condensing boiler": ["boiler_upgrade"],
+            "Double glazing": ["double_glazing"],
+            "Flue gas heat recovery device in conjunction with boiler": ["flue_gas_heat_recovery"],
+            "Wind turbine": ["wind_turbine"],
+            "Loft insulation": ["loft_insulation"],
+            "Solar photovoltaic (PV) panels": ["solar_pv"],
+            "Party wall insulation": ["party_wall_insulation"],
+            'Draught proofing': ["draught_proofing"],
+            "Roof insulation recommendation": [],
+            "Cavity wall insulation recommendation": [],
+            "Windows draught proofing": [],
+            "Low energy lighting for all fixed outlets": ["low_energy_lighting"],
+            "Cylinder thermostat recommendation": [],
+            "Heating controls recommendation": [],
+            "Replace boiler with Band A condensing boiler": ["boiler_upgrade"],
+            "Band A condensing gas boiler": ["boiler_upgrade"],
+            "Solar panel recommendation": [],
+            "Double glazing recommendation": [],
+            "Solid wall insulation recommendation": [],
+            "Fuel change recommendation": [],
+            "PV Cells recommendation": [],
+            "Replacement glazing units": ["double_glazing"],
+            "Heating controls (time and temperature zone control)": ["time_temperature_zone_control"],
+            "High heat retention storage heaters": ["high_heat_retention_storage_heater"],
+            "Gas condensing boiler": ["boiler_upgrade"],
+            "Change room heaters to condensing boiler": ["boiler_upgrade"],
+            "Cylinder thermostat": ["cylinder_thermostat"],
+            "Heat recovery system for mixer showers": ["heat_recovery_shower"],
+            "Room-in-roof insulation": ["room_in_roof_insulation"],
+            "Fan assisted storage heaters": [],
+            "Fan-assisted storage heaters": [],
+            "Step 1:": [],
+            "Step 2:": [],
+            'Step 3:': [],
+            "Biomass stove with boiler": [],
+            "Replace boiler with biomass boiler": [],
+            "Heating controls (room thermostat and thermostatic radiator valves)": [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ],
+            "Heating controls (programmer, and thermostatic radiator valves)": [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ],
+            "Heating controls (programmer and TRVs)": [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ],
+            "Heating controls (programmer and room thermostat)": [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ],
+            "Replacement warm air unit": [],
+            "Secondary glazing": ["secondary_glazing"],
+            "Condensing heating unit": ["boiler_upgrade"],
+            '???': [],
+            'Solar photovoltaic panels, 2.5kWp': ["solar_pv"],
+            'Heating controls (programmer, room thermostat and thermostatic radiator valves)': [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ],
+            'Translation missing: en.improvement_code.41.title': [],
+            "Condensing boiler (separate from the range cooker)": ["boiler_upgrade"],
+            "Heating controls (programmer and thermostatic radiator valves)": [
+                "roomstat_programmer_trvs", "time_temperature_zone_control"
+            ]
+        }
+
+        survey = True
+        if sap_2012_date is not None:
+            certificate_date = datetime.strptime(assessment_data["Date of certificate"], "%d %B %Y")
+            if certificate_date < pd.to_datetime(sap_2012_date):
+                survey = False
+
+        formatted_recommendations = []
+        for rec in recommendations:
+            mapped = measure_map[rec["measure"]]
+            for measure in mapped:
+                if measure == "cavity_wall_insulation" and "solid brick" in self.walls[0].lower():
+                    measure = "extension_cavity_wall_insulation"
+                to_append = {
+                    "type": measure,
+                    "sap_points": rec["sap_points"],
+                    "survey": survey,
+                }
+                if measure == "solar_pv":
+                    to_append["suitable"] = True
+                formatted_recommendations.append(to_append)
+
+        return formatted_recommendations
+
+    @classmethod
+    def get_from_epc(cls, epc):
+        # Attempt both methods:
+        try:
+            searcher = cls(address=epc["address"], postcode=epc["postcode"])
+            find_epc_data = searcher.retrieve_newest_find_my_epc_data()
+        except Exception as e:
+            logger.error(f"Error retrieving find my epc data: {e}")
+            # We attempt with the backup add
+            searcher = cls(address=epc["address1"], postcode=epc["postcode"])
+            find_epc_data = searcher.retrieve_newest_find_my_epc_data()
+
+        non_invasive_recommendations = {
+            "uprn": epc["uprn"],
+            "address": epc["address"],
+            "postcode": epc["postcode"],
+            "recommendations": find_epc_data["recommendations"],
+        }
+
+        return non_invasive_recommendations
--- a/etl/find_my_epc/requirements.txt
+++ b/etl/find_my_epc/requirements.txt
@ -0,0 +1,2 @@
+pandas
+beautifulsoup4
--- a/etl/funding/app.py
+++ b/etl/funding/app.py
@ -0,0 +1,35 @@
+"""
+This scipt prepares the data, required for us to perform funding calculations. The starting data should be stored
+on the machine this is being run on, and this will prepare the information and upload if
+"""
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+STAGE = "dev"
+DATA_BUCKET = "retrofit-data-{stage}"
+PROJECTS_SCORES_MATRIX_LOCATION = "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
+WHLG_ELIGIBLE_POSTCODES = "/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes.xlsx"
+
+
+def app():
+    # Read in the project scores matrix
+    project_scores_matrix = pd.read_csv(PROJECTS_SCORES_MATRIX_LOCATION)
+
+    # Store in AWS S3
+    save_csv_to_s3(
+        dataframe=project_scores_matrix,
+        bucket_name=DATA_BUCKET.format(stage=STAGE),
+        file_name="funding/ECO4 Full Project Scores Matrix.csv"
+    )
+
+    # Read in the Warm Homes Local Grant eligible postcodes data
+    whlg_eligible_postcodes = pd.read_excel(WHLG_ELIGIBLE_POSTCODES, sheet_name="Eligible postcodes", header=1)
+    # We tidy up the data before we store
+    whlg_eligible_postcodes = whlg_eligible_postcodes[["Postcode"]]
+    whlg_eligible_postcodes["Postcode"] = whlg_eligible_postcodes["Postcode"].str.lower()
+
+    save_csv_to_s3(
+        dataframe=whlg_eligible_postcodes,
+        bucket_name=DATA_BUCKET.format(stage=STAGE),
+        file_name="funding/whlg eligible postcodes.csv"
+    )
--- a/etl/lodgement/app.py
+++ b/etl/lodgement/app.py
@ -0,0 +1,326 @@
+import os
+
+import pandas as pd
+
+import utils.file_data_extraction as file_extraction_tools
+from utils.fullSapParser import FullSapParser
+from utils.OsmosisCondtionReportParser import OsmosisConditionReportParser
+
+output_template = {
+    "Property Address": None,
+    "Osm. ID": None,
+    "Postcode": None,
+    "City/County": None,
+    "District/Town": None,
+    "Funding Stream": None,
+    # "Risk Path": None,
+    "Local Authority": None,
+    "Trustmark Lodgement ID": None,
+    "Certificate Number": None,
+    "EWI UMR": None,
+    "Loft UMR": None,
+    "Windows UMR": None,
+    "Doors UMR": None,
+    "Measure Lodgement Date": None,
+    "Full Lodgement Date": None,
+    "Owner - Name": None,
+    "Owner - Phone": None,
+    "Owner - Email": None,
+    "Tenant - Name": None,
+    "Tenant - Phone": None,
+    "R. Assessor - Name": None,
+    "R. Coordinator - Name": None,
+    "Trustmark Licence Number": None,
+    "Retrofit Assessment Date": None,
+    "Company Name": None,
+    "Retrofit Designer Name": None,
+    "Property Type": None,
+    "Property Detachment": None,
+    "No. of Bedrooms": None,
+    "Property age": None,
+    "SAP Rating Pre (from IMA)": None,
+    "Pre Heat Transfer": None,
+    "Pre Total Floor Area": None,
+    "Pre Heat Demand": None,
+    "Pre Air Tightness": None,
+    "SAP Rating Post (from EPC)": None,
+    "Post Heat Transfer": None,
+    "Post Total Floor Area": None,
+    "Post Heat Demand": None,
+    "Post Air Tightness": None,
+    "Number of Eligible Measures Installed": None,
+    "Total Cost of Works": None,
+    "Annual Fuel Saving (MTP)": None,
+}
+
+
+def update_dictionary_with_check(dictionary, updates):
+    """
+    Updates a dictionary with key-value pairs, raising an error if the key does not exist.
+
+    Args:
+        dictionary (dict): The dictionary to update.
+        updates (dict): The updates to apply.
+
+    Raises:
+        KeyError: If a key in updates does not exist in the dictionary.
+    """
+    for key, value in updates.items():
+        if key not in dictionary:
+            raise KeyError(f"Key '{key}' does not exist in the dictionary.")
+        dictionary[key] = value
+
+
+def handler():
+    """
+    This is a simple application that will extract the data from documents that have been uploaded to Sharepoint
+    to populate the lodgement spreadsheet with
+    :return:
+    """
+
+    # Ths source data will eventually come from Sharepoint
+    source_data_path = "/Users/khalimconn-kowlessar/Documents/hestia/Lodgment Pilot"
+    output_template_file = "Trustmark Details - Template REV.25.11.24.xlsx"
+    funding_stream = "HUG2"
+    customer_name = "Shropshire Council"
+    customer_phone = "0345 678 9000"
+    customer_email = "affordablewarmth@shropshire.gov.uk"
+
+    # TODO: In order for this to go live, we need to use Poppler, which needs to be installed
+    #       w/ brew install poppler
+    #       We also need to install Tesseract: brew install tesseract
+
+    # List the folders in the source data path
+    folders = [x for x in os.listdir(source_data_path) if os.path.isdir(os.path.join(source_data_path, x))]
+
+    extractors = {
+        "elmhurst epr": file_extraction_tools.ElmhurstEprExtractor,
+        "elmhurst summary report": file_extraction_tools.ElmhurstSummaryReportExtractor,
+        "osmosis condition report": OsmosisConditionReportParser,
+        "elmhurst evidence report": None,
+        "full sap xml": FullSapParser,
+        "pulse air permeability": file_extraction_tools.PulseAirPermeabilityExtractor,
+        "elmhurst project handover": file_extraction_tools.ElmhurstProjectHandoverExtractor,
+        "core logic pas assessment report": file_extraction_tools.CoreLogicPasAssessmentReportExtractor,
+    }
+
+    extracted = []
+    for property_folder in folders:
+
+        property_folder_path = os.path.join(source_data_path, property_folder)
+        # List the folders in the source data path
+        subfolders = [
+            x for x in os.listdir(property_folder_path) if os.path.isdir(os.path.join(property_folder_path, x))
+        ]
+        coord_folder = os.path.join(property_folder_path, [f for f in subfolders if "RA Coordinator Info" in f][0])
+
+        # Get the contents of the folder
+        coordinator_folder_contents = [
+            file for file in os.listdir(coord_folder) if os.path.isfile(os.path.join(coord_folder, file))
+        ]
+
+        # We detect the various file types
+        extracted_contents = {}
+        for filename in coordinator_folder_contents:
+            filepath = os.path.join(coord_folder, filename)
+            if file_extraction_tools.is_pdf(filepath):
+                report_type = file_extraction_tools.detect_pdf_report_type(pdf_path=filepath)
+                if report_type is None:
+                    raise ValueError(f"Unknown report type for {filename}")
+
+                file_extractor = extractors[report_type]
+                if file_extractor is None:
+                    continue
+
+                extracted_contents[report_type] = file_extractor(filepath).extract()
+
+            if file_extraction_tools.is_xml(filepath):
+                xml_type = file_extraction_tools.detect_xml_report_type(xml_path=filepath)
+                if xml_type is None:
+                    raise ValueError(f"Unknown report type for {filename}")
+                file_extractor = extractors.get(xml_type)
+                if file_extractor is None:
+                    continue
+
+                extracted_contents[xml_type] = file_extractor(filepath).extract()
+
+        att_folder = os.path.join(property_folder_path, [f for f in subfolders if "Air Tightness Tests" in f][0])
+        att_folder_contents = [
+            file for file in os.listdir(att_folder) if os.path.isfile(os.path.join(att_folder, file))
+        ]
+
+        for filename in att_folder_contents:
+            filepath = os.path.join(att_folder, filename)
+            if file_extraction_tools.is_pdf(filepath):
+                report_type = file_extraction_tools.detect_pdf_report_type(pdf_path=filepath)
+                if report_type is None:
+                    raise ValueError(f"Unknown report type for {filename}")
+                file_extractor = extractors[report_type]
+
+                if file_extractor is None:
+                    continue
+
+                extracted_contents[report_type] = file_extractor(filepath).extract()
+
+        lodgement_folder = os.path.join(
+            property_folder_path, [f for f in subfolders if "TrustMark Lodgement" in f][0]
+        )
+        # Within the lodgement folder, we want the required documents sub-folder
+        lodgement_subfolders = [
+            file for file in os.listdir(lodgement_folder) if os.path.isdir(os.path.join(lodgement_folder, file))
+        ]
+        required_documents_folder = os.path.join(
+            lodgement_folder, [f for f in lodgement_subfolders if "required documents" in f.lower()][0]
+        )
+        # List the contents
+        required_documents_contents = [
+            file for file in os.listdir(required_documents_folder) if
+            os.path.isfile(os.path.join(required_documents_folder, file))
+        ]
+
+        # There are only a few file types we actually want to process in here for the moment
+        for filename in required_documents_contents:
+            filepath = os.path.join(required_documents_folder, filename)
+            if file_extraction_tools.is_pdf(filepath):
+                report_type = file_extraction_tools.detect_pdf_report_type(pdf_path=filepath)
+                if report_type != "elmhurst project handover":
+                    continue
+                file_extractor = extractors[report_type]
+
+                extracted_contents[report_type] = file_extractor(filepath).extract()
+
+        output_row_data = output_template.copy()
+
+        # dict_keys([ 'City/County', 'District/Town',
+        # 'Local Authority', 'Trustmark Lodgement ID', 'Certificate Number', 'EWI UMR', 'Loft UMR', 'Windows UMR',
+        # 'Doors UMR', 'Measure Lodgement Date', 'Full Lodgement Date', 'Owner - Name', 'Owner - Phone',
+        # 'Owner - Email', 'Tenant - Name', 'Tenant - Phone',
+        # 'Trustmark Licence Number',
+        # Pre Air Tightness', 'SAP Rating Post (from EPC)', 'Post Heat
+        # Transfer', 'Post Total Floor Area', 'Post Heat Demand', 'Post Air Tightness',
+        # 'Total Cost of Works', 'Annual Fuel Saving (MTP)'])
+
+        update_dictionary_with_check(
+            output_row_data,
+            {
+                "Funding Stream": funding_stream,
+                "Property Address": property_folder.split(")")[1].strip(),
+                "Osm. ID": property_folder.split(")")[0].strip().lstrip("(").strip(),
+            }
+        )
+
+        if extracted_contents.get("elmhurst epr"):
+            total_floor_area = sum(
+                [x["Floor Area (m2)"] for x in extracted_contents["elmhurst epr"]["Building Parts"]] +
+                # Get the conservatory floor area
+                [extracted_contents["elmhurst epr"]["Conservatory"]["Conservatory Floor Area"]]
+            )
+
+            pre_heat_transfer = extracted_contents["elmhurst epr"]["Primary Energy Use Intensity (kWh/m2/yr)"]
+            pre_heat_demand = (
+                extracted_contents["elmhurst epr"]["Primary Energy Use Intensity (kWh/m2/yr)"] * total_floor_area
+            )
+
+            epr_to_insert = {
+                "Postcode": extracted_contents["elmhurst epr"]["Postcode"],
+                "City/County": extracted_contents["elmhurst epr"]["County"],
+                "District/Town": extracted_contents["elmhurst epr"]["Town"],
+                "Local Authority": None,
+                'SAP Rating Pre (from IMA)': extracted_contents["elmhurst epr"]["Current SAP Rating"],
+                'Pre Heat Transfer': pre_heat_transfer,
+                'Pre Total Floor Area': total_floor_area,
+                'Pre Heat Demand': pre_heat_demand,
+                "R. Assessor - Name": extracted_contents["elmhurst epr"]["Assessor Name"],
+                "Retrofit Assessment Date": extracted_contents["elmhurst epr"]["Assessment Date"],
+            }
+            update_dictionary_with_check(
+                output_row_data,
+                epr_to_insert
+            )
+
+        if extracted_contents.get("full sap xml"):
+            xml_to_insert = {
+                "Property Type": extracted_contents["full sap xml"]["Property Type"],
+                "Property Detachment": extracted_contents["full sap xml"]["Built Form"],
+                "Property age": extracted_contents["full sap xml"]["Age Band"],
+
+            }
+            update_dictionary_with_check(
+                output_row_data,
+                xml_to_insert
+            )
+
+        if extracted_contents.get("osmosis condition report"):
+            cr_to_insert = {
+                "No. of Bedrooms": extracted_contents["osmosis condition report"]["No. of Bedrooms"],
+                # "Risk Path": extracted_contents["osmosis condition report"]["Risk Assessment Pathway"],
+            }
+            update_dictionary_with_check(
+                output_row_data,
+                cr_to_insert
+            )
+
+        if extracted_contents.get("elmhurst summary report"):
+            total_floor_area = sum(
+                [x["Floor Area (m2)"] for x in extracted_contents["elmhurst summary report"]["Building Parts"]] +
+                # Get the conservatory floor area
+                [extracted_contents["elmhurst summary report"]["Conservatory"]["Conservatory Floor Area"]]
+            )
+
+            pre_heat_transfer = (
+                extracted_contents["elmhurst summary report"]["Primary Energy Use Intensity (kWh/m2/yr)"]
+            )
+            pre_heat_demand = None  # Don't have this
+
+            summary_to_insert = {
+                "Postcode": extracted_contents["elmhurst summary report"]["Postcode"],
+                "City/County": extracted_contents["elmhurst summary report"]["County"],
+                "District/Town": extracted_contents["elmhurst summary report"]["Town"],
+                'SAP Rating Pre (from IMA)': extracted_contents["elmhurst summary report"]["Current SAP Rating"],
+                'Pre Heat Transfer': pre_heat_transfer,
+                'Pre Total Floor Area': total_floor_area,
+                'Pre Heat Demand': pre_heat_demand,
+                "R. Assessor - Name": extracted_contents["elmhurst summary report"]["Assessor Name"],
+                "Retrofit Assessment Date": extracted_contents["elmhurst summary report"]["Assessment Date"],
+            }
+
+            update_dictionary_with_check(
+                output_row_data,
+                summary_to_insert
+            )
+
+        if extracted_contents.get("pulse air permeability"):
+            # We extract the AP50 number
+            results_table = extracted_contents["pulse air permeability"]["Results Table"]
+            ap50 = [x["Extrapolated @ 50PA"] for x in results_table if x["Metric"] == "Air Permeability"][0]
+            update_dictionary_with_check(
+                output_row_data,
+                {"Pre Air Tightness": ap50}
+            )
+
+        if extracted_contents.get("elmhurst project handover"):
+            handover_to_insert = {
+                "Number of Eligible Measures Installed": len(
+                    extracted_contents["elmhurst project handover"]["Measures Fitted"]
+                ),
+                "Retrofit Designer Name": extracted_contents["elmhurst project handover"]["Designer Name"],
+                "Company Name": extracted_contents["elmhurst project handover"]["Installer Name"],
+                "R. Coordinator - Name": extracted_contents["elmhurst project handover"]["Retrofit Coordinator Name"],
+            }
+            update_dictionary_with_check(output_row_data, handover_to_insert)
+
+        if extracted_contents.get("core logic pas assessment report"):
+            cr_to_insert = {
+                "No. of Bedrooms": extracted_contents["core logic pas assessment report"]["Number of bedrooms"],
+            }
+            update_dictionary_with_check(
+                output_row_data,
+                cr_to_insert
+            )
+
+        extracted.append(output_row_data)
+
+    extracted_df = pd.DataFrame(extracted)
+
+    extracted_df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Lodgment Pilot/poc-extrcted-data.csv",
+                        index=False)
--- a/etl/lodgement/requirements.txt
+++ b/etl/lodgement/requirements.txt
@ -0,0 +1,14 @@
+PyPDF2
+pandas
+tqdm
+openpyxl
+boto3
+usaddress==0.5.11
+fuzzywuzzy==0.18.0
+python-dotenv
+python-docx
+pymupdf
+pytesseract
+pdf2image
+pillow
+pdfplumber
--- a/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py
+++ b/etl/route_march/oo_prs_additional_units/oo_prs_additional_units.py
@ -0,0 +1,240 @@
+import os
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from dotenv import load_dotenv
+from urllib.parse import urlencode
+from epc_api.client import EpcClient
+from utils.logger import setup_logger
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+logger = setup_logger()
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+CONFIG = [
+    {
+        "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+                    "11.11.2024.xlsx",
+        "tab": "SETTLE GBIS x 242 ",
+        "postcode_column": "Postcode",
+    },
+    {
+        "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+                    "11.11.2024.xlsx",
+        "tab": "ACIS GBIS x 76",
+        "postcode_column": "Postcode",
+    },
+    {
+        "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+                    "11.11.2024.xlsx",
+        "tab": "SOUTHERN GBIS x 150",
+        "postcode_column": "Postcode",
+    },
+    {
+        "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+                    "11.11.2024.xlsx",
+        "tab": "COMMUNITY HOUSING GBIS x 199",
+        "postcode_column": "Postcode",
+    },
+    {
+        "filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
+                    "11.11.2024.xlsx",
+        "tab": "EASTLIGHT GBIS x 42",
+        "postcode_column": "Postcode",
+    },
+]
+
+CAVITY_WALL_DESCRIPTIONS = [
+    "Cavity wall, as built, no insulation (assumed)",
+    "Cavity wall, as built, partial insulation (assumed)",
+    "Cavity wall, as built, insulated (assumed)",
+    "Cavity wall, with internal insulation",
+    "Cavity wall, with external insulation",
+]
+
+ROOF_DESCRIPTIONS = [
+    "Pitched, no insulation",
+    "Pitched, no insulation (assumed)",
+    "Pitched, 25 mm loft insulation",
+    "Pitched, 50 mm loft insulation",
+    "Pitched, 75 mm loft insulation",
+    "Pitched, 100 mm loft insulation",
+    "Pitched, 150 mm loft insulation",
+    "Pitched, limited insulation (assumed)",
+    "Pitched, insulated (assumed)",
+]
+
+SOCIAL_TENURES = ["Rented (social)", "rental (social)"]
+
+
+def process_postcode_epcs(postcode, client):
+    params = {"postcode": postcode.rstrip().lstrip()}
+    url = os.path.join(client.domestic.host, "search") + "?" + urlencode({"size": 1000})
+    response = client.domestic.call(method="get", url=url, params=params)
+    if "rows" not in response:
+        logger.warning("No EPCs found for postcode %s", postcode)
+        return pd.DataFrame()
+    postcode_epcs = pd.DataFrame(response["rows"])
+
+    # Processing code here
+    postcode_epcs["uprn"] = np.where(
+        pd.isnull(postcode_epcs["uprn"]),
+        postcode_epcs["address"],
+        postcode_epcs["uprn"]
+    )
+    postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False)
+    postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first")
+    return postcode_epcs
+
+
+def filter_and_prepare_epcs(epcs):
+    epcs["Is Cavity Property"] = epcs["walls-description"].isin(CAVITY_WALL_DESCRIPTIONS) & (
+        epcs["current-energy-efficiency"].astype(int) <= 72
+    )
+    epcs["Solar and Loft"] = (
+                                 epcs["roof-description"].isin(ROOF_DESCRIPTIONS)
+                             ) & (
+                                 epcs["photo-supply"].isin(["0", "", "0.0"])
+                             ) & (
+                                 epcs["current-energy-efficiency"].astype(int) <= 68
+                             )
+    epcs = epcs[epcs["Is Cavity Property"] | epcs["Solar and Loft"]]
+    epcs = epcs[~epcs["tenure"].isin(SOCIAL_TENURES)]
+    return epcs
+
+
+def rename_and_add_columns(epcs):
+    # Retrieve just the data we need
+    epcs = epcs[
+        [
+            "uprn",
+            "address",
+            "postcode",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description",
+            #
+            "energy-consumption-current",  # kwh/m2
+            "tenure",
+            "Is Cavity Property",
+            "Solar and Loft",
+        ]
+    ]
+
+    epcs = epcs.rename(
+        columns={
+            "address": "Address",
+            "postcode": "Postcode",
+            "inspection-date": "Date of last EPC",
+            "current-energy-efficiency": "SAP score on register",
+            "current-energy-rating": "EPC rating on register",
+            "property-type": "Property Type",
+            "built-form": "Archetype",
+            "total-floor-area": "Property Floor Area",
+            "construction-age-band": "Property Age Band",
+            "floor-height": "Property Floor Height",
+            "number-habitable-rooms": "Number of Habitable Rooms",
+            "walls-description": "Wall Construction",
+            "roof-description": "Roof Construction",
+            "mainheat-description": "Heating Type",
+            "secondheat-description": "Secondary Heating",
+            "transaction-type": "Reason for last EPC",
+            "energy-consumption-current": "Heat Demand (kWh/m2)",
+            "tenure": "Tenure"
+        }
+    )
+
+    epcs["Number of Habitable Rooms"] = epcs["Number of Habitable Rooms"].astype(int)
+    epcs["Property Floor Area"] = epcs["Property Floor Area"].astype(float)
+
+    # Add additional columns as in your original code
+    epcs["Estimated Number of Floors"] = epcs.apply(
+        lambda x: estimate_number_of_floors(x["Property Type"]) if pd.notnull(x["Property Type"]) else None, axis=1
+    )
+
+    epcs["Estimated Perimeter (m)"] = epcs.apply(
+        lambda x: estimate_perimeter(
+            x["Property Floor Area"] / x["Estimated Number of Floors"],
+            x["Number of Habitable Rooms"] / x["Estimated Number of Floors"]
+        ), axis=1
+    )
+    epcs["Estimated Heat Loss Perimeter (m2)"] = epcs.apply(
+        lambda x: estimate_external_wall_area(
+            x["Estimated Number of Floors"],
+            float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.4,
+            x["Estimated Perimeter (m)"],
+            x["Archetype"]
+        ), axis=1
+    )
+    epcs["Roof Insulation Thickness"] = epcs.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()[
+            "insulation_thickness"] if pd.notnull(x["Roof Construction"]) else None,
+        axis=1
+    )
+    return epcs
+
+
+def main():
+    """
+    This application is used to identify additional units that are private rentals or owner occupies that can be
+    included in the route marches
+
+    Required inputs are the following:
+    - An excel file that contains one or many tabs that include the addresses to be visited
+    """
+
+    # This should be set:
+    output_filepath = (
+        "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/PRS and OO properties - WC 11.11.2024.xlsx"
+    )
+    client = EpcClient(auth_token=EPC_AUTH_TOKEN)
+    writer = pd.ExcelWriter(output_filepath, engine="xlsxwriter")
+
+    for config in CONFIG:
+        logger.info("Processing %s", config["tab"])
+        # Read in the data
+        route_march_addresses = pd.read_excel(
+            config["filepath"],
+            sheet_name=config["tab"],
+            engine="openpyxl"
+        )
+
+        postcodes = route_march_addresses[config["postcode_column"]].unique()
+
+        epcs = []
+        for postcode in tqdm(postcodes):
+            postcode_epcs = process_postcode_epcs(postcode, client)
+            if postcode_epcs.empty:
+                continue
+            epcs.append(postcode_epcs)
+
+        # Concatenate all postcodes' data and filter it
+        epcs = pd.concat(epcs)
+        epcs = filter_and_prepare_epcs(epcs)
+        epcs = rename_and_add_columns(epcs)
+
+        sheet_name = config["tab"][:31]  # Excel sheet names max length of 31 characters
+        epcs.to_excel(writer, sheet_name=sheet_name, index=False)
+
+    # Save and close the writer outside the loop
+    writer.close()
+    logger.info("Data successfully written to %s", output_filepath)
--- a/etl/route_march/oo_prs_additional_units/requirements.txt
+++ b/etl/route_march/oo_prs_additional_units/requirements.txt
@ -0,0 +1,10 @@
+openpyxl
+epc-api-python==1.0.2
+numpy==2.1.2
+pandas==2.2.3
+usaddress==0.5.11
+fuzzywuzzy==0.18.0
+boto3==1.35.44
+python-dotenv
+tqdm
+xlsxwriter
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@ -9,7 +9,8 @@ from etl.xml_survey_extraction.pcdb import heating_data
 PROPERTY_TYPE_LOOKUP = {
    "0": "House",
    "House": "House",
-    "2": "Flat"
+    "2": "Flat",
+    "3": "Maisonette",
 }


@ -107,11 +108,13 @@ class XmlParser:

    BUILT_FORM_MAP = {
        "1": "Detached",
+        "2": "Semi-Detached",
        "3": "End-Terrace",
        "4": "Mid-Terrace",
    }

    GLAZED_AREA_MAP = {
+        "2": "More than Typical",
        "4": "Much More Than Typical"
    }

@ -120,7 +123,9 @@ class XmlParser:
    }

    TRANSACTION_TYPE_MAP = {
-        "13": "ECO assessment"
+        "5": "Rented (social)",
+        "13": "ECO assessment",
+        "14": "Stock condition survey",
    }

    TENURE_MAP = {
@ -131,7 +136,8 @@ class XmlParser:

    TARIFF_MAP = {
        "1": "Dual",
-        "2": "Single"
+        "2": "Single",
+        "3": "Unknown"
    }

    def __init__(self, file, filekey, surveyor_company, uprn=None):
@ -400,8 +406,13 @@ class XmlParser:
        ]

        wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
-        window_areas = sum([float(w["window_area"]) for w in main_dwelling_windows])
-        return wall_areas - window_areas
+        window_areas = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
+        if not window_areas:
+            # We discount 10% of the wall area
+            insulation_wall_area = wall_areas * 0.9
+        else:
+            insulation_wall_area = wall_areas - sum(window_areas)
+        return insulation_wall_area

    def extract_additional_data(self):

@ -415,7 +426,8 @@ class XmlParser:
        main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"]

        number_of_windows = len(main_dwelling_windows)
-        windows_area = sum([float(w["window_area"]) for w in main_dwelling_windows])
+        windows_area = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
+        windows_area = sum(windows_area) if windows_area else None

        boolean_lookup = {
            "true": True,
@ -427,6 +439,7 @@ class XmlParser:
        cylinder_insulation_type = {
            None: "",
            "1": "Foam",
+            "2": "Jacket"
        }

        cylinder_insulation_thickness = int(
@ -461,7 +474,7 @@ class XmlParser:
            "cylinder_thermostat": cylinder_thermostat,
            "main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area),
            "number_of_windows": int(number_of_windows),
-            "windows_area": float(windows_area),
+            "windows_area": float(windows_area) if windows_area is not None else windows_area,
        }

    def get_node_value(self, tag_name):
@ -769,9 +782,10 @@ class XmlParser:
        :return:
        """

-        sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
-
        glazing_type_lookup = {
+            "ND": "Single glazing",
+            "1": "double glazing installed before 2002",
+            "2": "double glazing installed during or after 2002",
            "3": "double glazing, unknown install date",
            "5": "Single glazing",
        }
@ -787,6 +801,40 @@ class XmlParser:
            "8": "North West"
        }

+        sap_windows = self.xml.getElementsByTagName("SAP-Windows")
+
+        if not sap_windows:
+            # We look for Multi-Glazed-Proportion
+            multiple_glazing_type = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
+                "Multiple-Glazing-Type"
+            )[0].firstChild.nodeValue
+
+            pvc_frame = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
+                "PVC-Window-Frames"
+            )
+
+            pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
+
+            multple_glazed_proportion = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
+                "Multiple-Glazed-Proportion"
+            )[0].firstChild.nodeValue
+
+            self.windows = [
+                {
+                    "window_location": "0",
+                    "window_area": None,
+                    "window_type": None,
+                    "glazing_type": glazing_type_lookup[multiple_glazing_type],
+                    "pvc_frame": pvc_frame,
+                    "glazing_gap": None,
+                    "orientation": None,
+                    "multple_glazed_proportion": multple_glazed_proportion
+                }
+            ]
+            return
+
+        sap_windows = sap_windows[0].getElementsByTagName("SAP-Window")
+
        self.windows = [
            self._parse_windows_content(
                window=window,
--- a/input_property_list.csv
+++ b/input_property_list.csv
@ -1,12 +0,0 @@
-address,postcode,Notes,,,,
-28 Distillery Wharf,W6 9bf,,,,,
-Flat 14 Godley V C House,E2 0LP,,,,,
-49 Elderfield Road,E5 0LF,,,,,
-26 Stanhope Road,N6 5NG,,,,,
-Flat 3 Frederick Building,N1 4BD,,,,,
-Flat 4 Frederick Building,N1 4BD,,,,,
-"Flat 28, 22 Adelina Grove",E1 3BX,,,,,
-"Flat 39, 239 Long Lane",SE1 4PT,,,,,
-"1, Westview, Somerby",LE14 2QH,This property has an unfilled cavity,,,,
-"59, Ashdale",CM23 4EB,This property has a partially filled cavity,,,,
-88 Cleveland Avenue,DL3 7BE,This property has a filled cavity,,,,
--- a/keyzy_pilot.csv
+++ b/keyzy_pilot.csv
@ -1,3 +0,0 @@
-address,postcode,Notes,,,,
-2 South Terrace,NN1 5JY,,,,,
-25 Albert Street,PO12 4TY,,,,,
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@ -37,22 +37,25 @@ MCS_SOLAR_PV_COST_DATA = {
    "average_cost_per_kwh-Northern Ireland": 1347,
 }

+# Installers are now working with 435 watt panels
+PANEL_SIZE = 0.435
+
 INSTALLER_SOLAR_COSTS = [
-    {'n_panels': 4, 'array_kwp': 1.6, 'cost': 3040.00, 'installer': 'CEG'},
-    {'n_panels': 5, 'array_kwp': 2.1, 'cost': 3201.00, 'installer': 'CEG'},
-    {'n_panels': 6, 'array_kwp': 2.5, 'cost': 3363.00, 'installer': 'CEG'},
-    {'n_panels': 7, 'array_kwp': 2.9, 'cost': 3524.00, 'installer': 'CEG'},
-    {'n_panels': 8, 'array_kwp': 3.3, 'cost': 3686.00, 'installer': 'CEG'},
-    {'n_panels': 9, 'array_kwp': 3.7, 'cost': 3847.00, 'installer': 'CEG'},
-    {'n_panels': 10, 'array_kwp': 4.1, 'cost': 4009.00, 'installer': 'CEG'},
-    {'n_panels': 11, 'array_kwp': 4.5, 'cost': 4170.00, 'installer': 'CEG'},
-    {'n_panels': 12, 'array_kwp': 4.9, 'cost': 4332.00, 'installer': 'CEG'},
-    {'n_panels': 13, 'array_kwp': 5.3, 'cost': 4835.00, 'installer': 'CEG'},
-    {'n_panels': 14, 'array_kwp': 5.7, 'cost': 5015.00, 'installer': 'CEG'},
-    {'n_panels': 15, 'array_kwp': 6.2, 'cost': 5176.00, 'installer': 'CEG'},
-    {'n_panels': 16, 'array_kwp': 6.6, 'cost': 5338.00, 'installer': 'CEG'},
-    {'n_panels': 17, 'array_kwp': 7.0, 'cost': 5500.00, 'installer': 'CEG'},
-    {'n_panels': 18, 'array_kwp': 7.4, 'cost': 6021.00, 'installer': 'CEG'}
+    {'n_panels': 4, 'array_kwp': 4 * PANEL_SIZE, 'cost': 4089.25, 'installer': 'CEG'},
+    {'n_panels': 5, 'array_kwp': 5 * PANEL_SIZE, 'cost': 4242.48, 'installer': 'CEG'},
+    {'n_panels': 6, 'array_kwp': 6 * PANEL_SIZE, 'cost': 4395.71, 'installer': 'CEG'},
+    {'n_panels': 7, 'array_kwp': 7 * PANEL_SIZE, 'cost': 4548.94, 'installer': 'CEG'},
+    {'n_panels': 8, 'array_kwp': 8 * PANEL_SIZE, 'cost': 4702.17, 'installer': 'CEG'},
+    {'n_panels': 9, 'array_kwp': 9 * PANEL_SIZE, 'cost': 4855.41, 'installer': 'CEG'},
+    {'n_panels': 10, 'array_kwp': 10 * PANEL_SIZE, 'cost': 5010.95, 'installer': 'CEG'},
+    {'n_panels': 11, 'array_kwp': 11 * PANEL_SIZE, 'cost': 5166.49, 'installer': 'CEG'},
+    {'n_panels': 12, 'array_kwp': 12 * PANEL_SIZE, 'cost': 5322.04, 'installer': 'CEG'},
+    {'n_panels': 13, 'array_kwp': 13 * PANEL_SIZE, 'cost': 5657.6, 'installer': 'CEG'},
+    {'n_panels': 14, 'array_kwp': 14 * PANEL_SIZE, 'cost': 5993.16, 'installer': 'CEG'},
+    {'n_panels': 15, 'array_kwp': 15 * PANEL_SIZE, 'cost': 6328.71, 'installer': 'CEG'},
+    {'n_panels': 16, 'array_kwp': 16 * PANEL_SIZE, 'cost': 6483.33, 'installer': 'CEG'},
+    {'n_panels': 17, 'array_kwp': 17 * PANEL_SIZE, 'cost': 6637.95, 'installer': 'CEG'},
+    {'n_panels': 18, 'array_kwp': 18 * PANEL_SIZE, 'cost': 6792.57, 'installer': 'CEG'}
 ]
 # This is the maximum number of panels that we have a cost from the installers for
 INSTALLER_MAX_PANELS = 18
@ -62,11 +65,11 @@ INSTALLER_MAX_PANELS = 18
 INSTALLER_SOLAR_PV_INVERTER_COST = 7500
 INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST = 500  # Just a rough guess to labour costs

-INSTALLER_SCAFFOLDING_COSTS = [
-    {'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'},
-    {'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'},
-    {'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'}
-]
+# INSTALLER_SCAFFOLDING_COSTS = [
+#     {'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'},
+#     {'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'},
+#     {'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'}
+# ]

 # This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
 # to be conservative
@ -101,10 +104,10 @@ INSTALLER_ASHP_COSTS = [
 BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500

 INSTALLER_SOLAR_BATTERY_COSTS = [
-    {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 2700.00, 'installer': 'CEG'},
-    {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'},
-    {'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'},
-    {'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'}
+    {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 3769.89, 'installer': 'JJC'},
+    # {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'},
+    # {'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'},
+    # {'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'}
 ]

 # This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
@ -149,7 +152,7 @@ CONDENSING_BOILER_COSTS = {
 ELECTRIC_BOILER_COSTS = 1800

 # Assumes 1 hours to remove each heater (including re-decorating)
-ROOM_HEATER_REMOVAL_COST = 50
+ROOM_HEATER_REMOVAL_COST = 25
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3

 # This is a cost quoted by Jim for a system flush - existig system will run more efficiently
@ -190,6 +193,8 @@ class Costs:
    # fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
    IWI_CONTINGENCY = 0.2

+    # For air source heat pumps, we inflate the assume cost by quite a bit to account for design and installation
+    ASHP_CONTINGENCY = 0.35
    # Where there is more uncertainty, a higher contingency rate is used
    HIGH_RISK_CONTINGENCY = 0.2
    # When there is less uncertainty, a lower contingency rate is used
@ -234,6 +239,13 @@ class Costs:
        if self.region is None:
            # Try and grab using the local-authority-label
            self.region = county_to_region_map.get(self.property.data["local-authority-label"], None)
+
+            if self.region is None:
+                # Try and get the region after converting the keys to lower
+                self.region = {
+                    k.lower(): v for k, v in county_to_region_map.items()
+                }.get(self.property.data["local-authority-label"].lower(), None)
+
            if self.region is None:
                raise ValueError("Region not found in county map")

@ -719,8 +731,9 @@ class Costs:
            "labour_days": labour_days
        }

+    @classmethod
    def solar_pv(
-        self,
+        cls,
        n_panels: int | float,
        has_battery: bool = False,
        array_cost=None,
@ -758,33 +771,28 @@ class Costs:
        else:
            system_cost = [c for c in INSTALLER_SOLAR_COSTS if c["n_panels"] == n_panels][0]["cost"]

-        total_cost = array_cost if array_cost is not None else system_cost
+        subtotal = array_cost if array_cost is not None else system_cost

        if has_battery:
            battery_cost = [c for c in INSTALLER_SOLAR_BATTERY_COSTS if c["capacity_kwh"] == battery_kwh][0]["cost"]
-            total_cost += battery_cost
-
-        scaffolding_cost = [c for c in INSTALLER_SCAFFOLDING_COSTS if c["stories"] == n_floors][0]["cost"]
-        total_cost += scaffolding_cost
+            subtotal += battery_cost

        if needs_inverter:
-            total_cost += INSTALLER_SOLAR_PV_INVERTER_COST
+            subtotal += INSTALLER_SOLAR_PV_INVERTER_COST
            # We also add an additional labour cost
-            total_cost += INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST
+            subtotal += INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST

-        # We add an additional cost for scaffolding
-
-        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
-
-        vat = total_cost - subtotal_before_vat
+        # Solar doesn't have VAT but we add a high risk contingency
+        # to account for design variation that we see in practice
+        total_cost = subtotal * (1 + cls.HIGH_RISK_CONTINGENCY)

        # Labour hours are based on estimates from online research but an average team seems to consist of 3 people
        # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 48 hours of
        # labour
        return {
            "total": total_cost,
-            "subtotal": subtotal_before_vat,
-            "vat": vat,
+            "subtotal": subtotal,
+            "vat": 0,
            "labour_hours": 48,
            "labour_days": 2,
        }
@ -1154,7 +1162,6 @@ class Costs:
        pump. This cost will include the boiler upgrade scheme grant

        """
-
        # This is the average cost of a project, we'll add some additional contingency

        if ashp_size is None:
@ -1163,9 +1170,10 @@ class Costs:
            cost = [x for x in INSTALLER_ASHP_COSTS if x][0]["cost"]

        # We add some contingency since there are additional costs such as resizing radiators, that could be required
-        total_cost = cost * (1 + self.CONTINGENCY)
-        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
-        vat = total_cost - subtotal_before_vat
+        subtotal = cost * (1 + self.ASHP_CONTINGENCY)
+        # The costs from installers exclude VAT
+        vat = subtotal * self.VAT_RATE
+        total_cost = subtotal + vat

        # We assume 5 days installation
        labour_days = 5
@ -1173,7 +1181,7 @@ class Costs:

        return {
            "total": total_cost,
-            "subtotal": subtotal_before_vat,
+            "subtotal": subtotal,
            "vat": vat,
            "labour_hours": labour_hours,
            "labour_days": labour_days,
--- a/recommendations/DraughtProofingRecommendations.py
+++ b/recommendations/DraughtProofingRecommendations.py
@ -26,6 +26,9 @@ class DraughtProofingRecommendations:
        if not draught_proofing_recommendation_config:
            return

+        # Cost is based on a £50 cost per window, based on Checkatrade
+        cost = draught_proofing_recommendation_config.get("cost", self.property.number_of_windows * 50)
+
        description = (
            "Draught proof doors and windows to improve energy efficiency" if
            not draught_proofing_recommendation_config.get("description")
@ -48,7 +51,7 @@ class DraughtProofingRecommendations:
                "kwh_savings": 0,
                "co2_equivalent_savings": 0,
                "energy_cost_savings": 0,
-                "total": draught_proofing_recommendation_config["cost"],
+                "total": cost,
                # We use a very simple and rough estimate of 4 hours per unit
                "labour_hours": draught_proofing_recommendation_config.get("labour_hours", 8),
                "labour_days": draught_proofing_recommendation_config.get("labour_days", 1),  # Assume 8 hour day
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@ -145,7 +145,9 @@ class FloorRecommendations(Definitions):
            )
            return

-        raise NotImplementedError("Implement me!")
+        # In this case, we have no recommendation to make. E.g., if we have a solid floor property
+        # but solid floor insulation has been excluded as a measure, we get here
+        return

    @staticmethod
    def _make_floor_description(material):
@ -172,6 +174,11 @@ class FloorRecommendations(Definitions):

        insulation_materials = pd.DataFrame(insulation_materials)

+        non_invasive_recs = next(
+            (r for r in self.property.non_invasive_recommendations if
+             r["type"] == insulation_materials["type"].values[0]), {}
+        )
+
        lowest_selected_u_value = None
        for _, insulation_material_group in insulation_materials.groupby("description"):

@ -217,6 +224,9 @@ class FloorRecommendations(Definitions):
                    else:
                        raise NotImplementedError("Implement me!")

+                    sap_points = non_invasive_recs.get("sap_points", None)
+                    survey = non_invasive_recs.get("survey", False)
+
                    floor_ending_config = FloorAttributes(new_description).process()
                    floor_simulation_config = check_simulation_difference(
                        new_config=floor_ending_config, old_config=self.property.floor, prefix="floor_"
@ -245,7 +255,8 @@ class FloorRecommendations(Definitions):
                            "description": self._make_floor_description(material),
                            "starting_u_value": u_value,
                            "new_u_value": new_u_value,
-                            "sap_points": None,
+                            "sap_points": sap_points,
+                            "survey": survey,
                            "already_installed": already_installed,
                            "simulation_config": simulation_config,
                            "description_simulation": {
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@ -12,7 +12,7 @@ class HeatingControlRecommender:

        self.recommendation = []

-    def recommend(self, heating_description, description_prefix="", description_suffix=""):
+    def recommend(self, heating_description, phase, description_prefix="", description_suffix=""):

        # TODO: Many of these functions are quite similar. We can possibly create a single wrapper function that
        #  takes in the heating description and the description prefix/suffix, and then creates the appropriate
@ -23,32 +23,32 @@ class HeatingControlRecommender:
        # This first iteration of the recommender will provide very basic recommendation
        # We recommend heating controls based on the main heating system
        if heating_description in ["Room heaters, electric"]:
-            self.recommend_room_heaters_electric_controls()
+            self.recommend_room_heaters_electric_controls(phase=phase)
            return

        if heating_description in ["Electric storage heaters", "Electric storage heaters, radiators"]:
-            self.recommend_high_heat_retention_controls(description_prefix=description_prefix)
+            self.recommend_high_heat_retention_controls(description_prefix=description_prefix, phase=phase)
            return

        if heating_description in ["Boiler and radiators, mains gas"]:
            # We can recommend roomstat programmer trvs
-            self.recommend_roomstat_programmer_trvs(description_suffix=description_suffix)
+            self.recommend_roomstat_programmer_trvs(description_suffix=description_suffix, phase=phase)
            # We can also recommend time and temperature zone controls
-            self.recommend_time_temperature_zone_controls(description_suffix=description_suffix)
+            self.recommend_time_temperature_zone_controls(description_suffix=description_suffix, phase=phase)

            return

        if heating_description in ["Boiler and radiators, electric"]:
-            self.recommend_roomstat_programmer_trvs()
+            self.recommend_roomstat_programmer_trvs(phase=phase)
            return

        if heating_description in ["Air source heat pump, radiators, electric"]:
            # For an ASHP, we can recommend time and temperature zone controls, as well as programmer, trvs and a bypass
            # which are common configurations for ASHPs
-            self.recommend_time_temperature_zone_controls()
+            self.recommend_time_temperature_zone_controls(phase=phase)
            # self.recommend_programmer_trvs_bypass()

-    def recommend_room_heaters_electric_controls(self):
+    def recommend_room_heaters_electric_controls(self, phase):
        """
        If the home has Room heaters, electric, we start by identifying potential heating controls that could
        be upgraded, that would provide a practical impact. This will be the least invasive improvement.
@ -88,6 +88,9 @@ class HeatingControlRecommender:

            self.recommendation.append(
                {
+                    "phase": phase,
+                    "type": "heating",
+                    "measure_type": "programmer_appliance_thermostat",
                    "description": "upgrade heating controls to Programmer and Appliance or Smart Thermostats",
                    **self.costs.programmer_and_appliance_thermostat(has_programmer=has_programmer),
                    "simulation_config": simulation_config
@ -97,7 +100,7 @@ class HeatingControlRecommender:
        # We don't implement any other recommendations right now
        return

-    def recommend_high_heat_retention_controls(self, description_prefix=""):
+    def recommend_high_heat_retention_controls(self, phase, description_prefix=""):
        """
        When applicable, we recommend upgrading the heating controls to high heat retention controls. This is a
        specific type of control system that is designed to work with electric storage heaters. It is a more
@ -133,6 +136,9 @@ class HeatingControlRecommender:

        self.recommendation.append(
            {
+                "phase": phase,
+                "type": "heating",
+                "measure_type": "celect_type_controls",
                "description": "Upgrade heating controls to High Heat Retention Storage Heater Controls",
                **self.costs.celect_type_controls(),
                "simulation_config": simulation_config,
@ -143,7 +149,7 @@ class HeatingControlRecommender:
        # We don't implement any other recommendations right now
        return

-    def recommend_roomstat_programmer_trvs(self, description_suffix=""):
+    def recommend_roomstat_programmer_trvs(self, phase, description_suffix=""):
        """
        If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could
        be upgraded, that would provide a practical impact.
@ -208,15 +214,16 @@ class HeatingControlRecommender:

        description = "Upgrade heating controls to Room thermostat, programmer and TRVs"

-        already_installed = "heating_control" in self.property.already_installed
+        already_installed = "roomstat_programmer_trvs" in self.property.already_installed
        if already_installed:
            cost_result = override_costs(cost_result)
            description = "Heating controls have already been upgraded, no further action needed."

        self.recommendation.append(
            {
-                "type": "heating_control",
+                "type": "heating",
                "measure_type": "roomstat_programmer_trvs",
+                "phase": phase,
                "parts": [],
                "description": description,
                **cost_result,
@ -231,7 +238,7 @@ class HeatingControlRecommender:

        return

-    def recommend_time_temperature_zone_controls(self, description_suffix=""):
+    def recommend_time_temperature_zone_controls(self, phase, description_suffix=""):
        """
        If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced
        and more efficient control system than the standard controls that come with a boiler. However, it may come
@ -282,14 +289,15 @@ class HeatingControlRecommender:
            "temperature zone control)"
        )

-        already_installed = "heating_control" in self.property.already_installed
+        already_installed = "time_temperature_zone_control" in self.property.already_installed
        if already_installed:
            cost_result = override_costs(cost_result)
            description = "Heating controls have already been upgraded, no further action needed."

        self.recommendation.append(
            {
-                "type": "heating_control",
+                "type": "heating",
+                "phase": phase,
                "measure_type": "time_temperature_zone_control",
                "parts": [],
                "description": description,
@ -335,14 +343,15 @@ class HeatingControlRecommender:

        description = "Install a Bypass valve, TRVs and a Programmer"

-        already_installed = "heating_control" in self.property.already_installed
+        already_installed = "programmer_trvs_bypass" in self.property.already_installed
        if already_installed:
            cost_result = override_costs(cost_result)
            description = "Heating controls have already been upgraded, no further action needed."

        self.recommendation.append(
            {
-                "type": "heating_control",
+                "type": "heating",
+                "measure_type": "programmer_trvs_bypass",
                "parts": [],
                "description": description,
                **cost_result,
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@ -65,7 +65,6 @@ class HeatingRecommender:
        self.costs = Costs(self.property)

        self.heating_recommendations = []
-        self.heating_control_recommendations = []

        self.has_electric_heating_description = (
            self.property.main_heating["has_electric"] or self.property.main_heating["has_electricaire"]
@ -259,7 +258,6 @@ class HeatingRecommender:
            "ashp_only_heating_recommendation", False
        )
        self.heating_recommendations = []
-        self.heating_control_recommendations = []
        # This first iteration of the recommender will provide very basic recommendation
        # We recommend heating controls based on the main heating system

@ -302,7 +300,6 @@ class HeatingRecommender:
            self.recommend_air_source_heat_pump(
                phase=phase,
                has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
-
            )

        return
@ -360,7 +357,7 @@ class HeatingRecommender:
            }

            controls_recommender = HeatingControlRecommender(self.property)
-            controls_recommender.recommend(heating_description="Boiler and radiators, electric")
+            controls_recommender.recommend(heating_description="Boiler and radiators, electric", phase=phase)

            self.heating_recommendations.extend([boiler_recommendation] + controls_recommender.recommendation)
            return
@ -453,7 +450,7 @@ class HeatingRecommender:
        ), {})

        controls_recommender = HeatingControlRecommender(self.property)
-        controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
+        controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric", phase=phase)
        ashp_size = self.size_heat_pump()

        ashp_costs = self.costs.air_source_heat_pump(ashp_size)
@ -631,7 +628,8 @@ class HeatingRecommender:
        heating_controls_only,
        system_change,
        system_type,
-        measure_type
+        measure_type,
+        non_intrusive_recommendation=None
    ):
        """
        Given a recommendation for heating controls, and a recommendation for the heating system, we combine the two
@ -649,8 +647,13 @@ class HeatingRecommender:
        :param system_type: The type of heating system we are recommending
        :param measure_type: The type of measure we are recommending - more granular than the "type" field, allowing us
        to distinguish between different types of heating recommendations
+        :param non_intrusive_recommendation: A non-intrusive recommendation, which may specify the number of SAP points
+                                             or a cost for this recommendation
        """

+        if non_intrusive_recommendation is None:
+            non_intrusive_recommendation = {}
+
        # We produce recommendations with & without heating controls
        # We will also produce a recommendation for heating controls only
        heating_controls_switch = [True, False] if controls_recommendations else [False]
@ -698,13 +701,14 @@ class HeatingRecommender:
                "description": recommendation_description,
                "starting_u_value": None,
                "new_u_value": None,
-                "sap_points": None,
+                "sap_points": non_intrusive_recommendation.get("sap_points"),
                "already_installed": already_installed,
                **total_costs,
                "simulation_config": recommendation_simulation_config,
                "description_simulation": recommendation_description_simulation,
                # We insert the heating system type here
-                "system_type": system_type
+                "system_type": system_type,
+                "survey": non_intrusive_recommendation.get("survey", False)
            }

            output.append(recommendation)
@ -798,7 +802,9 @@ class HeatingRecommender:
                description_prefix = ""

            controls_recommender.recommend(
-                heating_description="Electric storage heaters", description_prefix=description_prefix
+                heating_description="Electric storage heaters",
+                description_prefix=description_prefix,
+                phase=phase
            )

        has_hhr = self.is_hhr_already_installed()
@ -807,6 +813,13 @@ class HeatingRecommender:
            # No recommendation needed
            return

+        # We check if there is a high heat retention non-intrusive recommendation
+        non_intrusive_recommendation = next(
+            (r for r in self.property.non_invasive_recommendations if
+             r["type"] == "high_heat_retention_storage_heater"),
+            {}
+        )
+
        # We check if the property has dual heating in place with a boiler and storage heaters
        if self.dual_heating:
            new_heating_description = self.DUAL_HEATING_DESCRIPTIONS[
@ -838,6 +851,8 @@ class HeatingRecommender:
        else:
            heating_simulation_config["mainheat_energy_eff_ending"] = self.property.data["mainheat-energy-eff"]

+        # TODO:We possibly shouldn't touch the hot water energy efficiency if we aren't recommending dual immersion
+        #      we'll keep this for the moment though
        if self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor"]:
            heating_simulation_config["hot_water_energy_eff_ending"] = "Average"
        else:
@ -895,7 +910,8 @@ class HeatingRecommender:
            heating_controls_only=heating_controls_only,
            system_change=system_change,
            system_type="high_heat_retention_storage_heater",
-            measure_type="high_heat_retention_storage_heater"
+            measure_type="high_heat_retention_storage_heater",
+            non_intrusive_recommendation=non_intrusive_recommendation
        )
        if _return:
            return recommendations
@ -978,9 +994,13 @@ class HeatingRecommender:
        # We check if there's a mains connection and the hot water is inefficient, as this will improve with a boiler
        has_inefficient_water = (
            self.property.data["mains-gas-flag"] and
-            self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]
+            self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor"]
        )

+        non_invasive_recommendation = next((
+            r for r in self.property.non_invasive_recommendations if r["type"] == "boiler_upgrade"
+        ), {})
+
        if has_inefficient_space_heating or has_inefficient_water:
            boiler_size = self.estimate_boiler_size(
                property_type=self.property.data["property-type"],
@ -1079,12 +1099,13 @@ class HeatingRecommender:
                "description": description,
                "starting_u_value": None,
                "new_u_value": None,
-                "sap_points": None,
+                "sap_points": non_invasive_recommendation.get("sap_points", None),
                "already_installed": already_installed,
                "simulation_config": simulation_config,
                "description_simulation": description_simulation,
                **boiler_costs,
                "system_type": "boiler_upgrade",
+                "survey": non_invasive_recommendation.get("survey", None)
            }

        # We recommend the heating controls
@ -1098,10 +1119,10 @@ class HeatingRecommender:
            description_suffix = ""
        controls_recommender.recommend(
            heating_description="Boiler and radiators, mains gas",
-            description_suffix=description_suffix
+            description_suffix=description_suffix,
+            phase=recommendation_phase
        )
        # We may have 2 recommendations from the heating controls
-
        if not controls_recommender.recommendation and not boiler_recommendation:
            return

@ -1111,6 +1132,8 @@ class HeatingRecommender:

        if system_change:
            # We combine the heating and controls recommendations, in the case of a system change
+            # If this is true, we set SAP points to None and survey to False for the boiler recommendation
+
            combined_recommendations = []
            for controls_recommendation in controls_recommender.recommendation:
                combined_recommendation = self.combine_heating_and_controls(
@ -1137,10 +1160,6 @@ class HeatingRecommender:
            # 3) Heating controls only
            # But they are options that are not mutually exclusive
            # So, we actually set heating controls as a heating recommendation
-            for recommendation in controls_recommender.recommendation:
-                recommendation["phase"] = recommendation_phase
-                # recommendation["type"] = "heating"
-
-            self.heating_control_recommendations.extend(controls_recommender.recommendation)
+            self.heating_recommendations.extend(controls_recommender.recommendation)

        return
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@ -20,26 +20,66 @@ class HotwaterRecommendations:
        :return:
        """
        # Reset the recommendations
+        recommendations_phase = phase
+
        self.recommendations = []
+        non_invasive_recommendations = self.property.non_invasive_recommendations
+        if non_invasive_recommendations:
+            measures = [
+                r["type"] for r in non_invasive_recommendations if
+                r["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]
+            ]
+
+            for m in measures:
+                non_invasive_rec = [
+                    r for r in non_invasive_recommendations if r["type"] == m
+                ][0]
+                if m == "hot_water_tank_insulation":
+                    # We need to be able to stack these recommendations
+                    self.recommend_tank_insulation(
+                        phase=recommendations_phase,
+                        sap_points=non_invasive_rec["sap_points"],
+                        survey=non_invasive_rec["survey"],
+                    )
+
+                    recommendations_phase += 1
+                elif m == "cylinder_thermostat":
+                    self.recommend_cylinder_thermostat(
+                        phase=recommendations_phase,
+                        sap_points=non_invasive_rec["sap_points"],
+                        survey=non_invasive_rec["survey"],
+                    )
+                    recommendations_phase += 1

        # This first iteration of the recommender will provide very basic recommendation
        # We recommend heating controls based on the main heating system

-        # If there is no system present, but access to the mains, we
+        if self.property.hotwater["clean_description"] == "Gas boiler/circulator, no cylinder thermostat":
+            # Handle this case specifically:
+            self.recommend_cylinder_thermostat_gas_boiler_circulator(phase=recommendations_phase)
+            return
+
+            # If there is no system present, but access to the mains, we
+
+        has_tank_recommendation = [r for r in self.recommendations if r["type"] == "hot_water_tank_insulation"]

        if (
            (self.property.hotwater["heater_type"] in ["electric immersion"]) &
            (self.property.data["hot-water-energy-eff"] == "Very Poor") &
-            (self.property.hotwater["no_system_present"] is None)
+            (self.property.hotwater["no_system_present"] is None) &
+            (len(has_tank_recommendation) == 0)
        ):
-            self.recommend_tank_insulation(phase=phase)
+            self.recommend_tank_insulation(phase=recommendations_phase)
            return

-        if self.property.hotwater["clean_description"] == "From main system, no cylinder thermostat":
-            self.recommend_cylinder_thermostat(phase=phase)
+        has_cylinder_recommendation = [r for r in self.recommendations if r["type"] == "cylinder_thermostat"]
+
+        if ((self.property.hotwater["clean_description"] == "From main system, no cylinder thermostat") &
+            (len(has_cylinder_recommendation) == 0)):
+            self.recommend_cylinder_thermostat(phase=recommendations_phase)
            return

-    def recommend_tank_insulation(self, phase):
+    def recommend_tank_insulation(self, phase, sap_points=None, survey=False, _return=False):
        """
        If the home has a very poor hot water system, this is often indicative of a lack of insulation on the hot water
        tank. This is a very simple and cost effective improvement that can be made to the home. It will likely
@ -55,27 +95,30 @@ class HotwaterRecommendations:
        else:
            description = "Insulate hot water tank"

-        self.recommendations.append(
-            {
-                "phase": phase,
-                "parts": [],
-                "type": "hot_water_tank_insulation",
-                "measure_type": "hot_water_tank_insulation",
-                "description": description,
-                "starting_u_value": None,
-                "new_u_value": None,
-                "sap_points": None,
-                "already_installed": already_installed,
-                **recommendation_cost,
-                "simulation_config": {"hot_water_energy_eff_ending": "Poor"},
-                "description_simulation": {
-                    "hot-water-energy-eff": "Poor"
-                }
-            }
-        )
+        to_append = {
+            "phase": phase,
+            "parts": [],
+            "type": "hot_water_tank_insulation",
+            "measure_type": "hot_water_tank_insulation",
+            "description": description,
+            "starting_u_value": None,
+            "new_u_value": None,
+            "sap_points": sap_points,
+            "already_installed": already_installed,
+            **recommendation_cost,
+            "simulation_config": {"hot_water_energy_eff_ending": "Poor"},
+            "description_simulation": {
+                "hot-water-energy-eff": "Poor"
+            },
+            "survey": survey
+        }
+        if _return:
+            return to_append
+
+        self.recommendations.append(to_append)
        return

-    def recommend_cylinder_thermostat(self, phase):
+    def recommend_cylinder_thermostat(self, phase, sap_points=None, survey=False, _return=False):
        """
        If the home has a very poor hot water system, this is often indicative of a lack of insulation on the hot water
        tank. This is a very simple and cost effective improvement that can be made to the home.
@ -101,23 +144,86 @@ class HotwaterRecommendations:
            **hotwater_simulation_config
        }

-        self.recommendations.append(
-            {
-                "phase": phase,
-                "parts": [],
-                "type": "cylinder_thermostat",
-                "measure_type": "cylinder_thermostat",
-                "description": description,
-                "starting_u_value": None,
-                "new_u_value": None,
-                "sap_points": None,
-                "already_installed": already_installed,
-                **recommendation_cost,
-                "simulation_config": simulation_config,
-                "description_simulation": {
-                    "hot-water-energy-eff": self.property.data["hot-water-energy-eff"],
-                    "hotwater-description": new_epc_description,
-                }
-            }
-        )
+        to_append = {
+            "phase": phase,
+            "parts": [],
+            "type": "cylinder_thermostat",
+            "measure_type": "cylinder_thermostat",
+            "description": description,
+            "starting_u_value": None,
+            "new_u_value": None,
+            "sap_points": sap_points,
+            "already_installed": already_installed,
+            **recommendation_cost,
+            "simulation_config": simulation_config,
+            "description_simulation": {
+                "hot-water-energy-eff": self.property.data["hot-water-energy-eff"],
+                "hotwater-description": new_epc_description,
+            },
+            "survey": survey
+        }
+        if _return:
+            return to_append
+
+        self.recommendations.append(to_append)
+        return
+
+    def recommend_cylinder_thermostat_gas_boiler_circulator(self, phase):
+        """
+        If the home has a very poor hot water system, this is often indicative of a lack of insulation on the
+        hot water
+        tank. This is a very simple and cost effective improvement that can be made to the home.
+        """
+
+        thermostat_recommendation_cost = self.costs.cylinder_thermostat()
+        cylinder_recommendation_cost = self.costs.hot_water_tank_insulation()
+        # Add them
+        total_cost = {
+            k: thermostat_recommendation_cost[k] + cylinder_recommendation_cost[k] for k in
+            thermostat_recommendation_cost.keys()
+        }
+
+        already_installed = "cylinder_thermostat" in self.property.already_installed
+        if already_installed:
+            total_cost = override_costs(total_cost)
+            description = "Cylinder thermostat & insulation has already been installed, no further action required"
+        else:
+            description = "Install a smart cylinder thermostat and insulate the hot water tank with 80mm insulation"
+
+        new_epc_description = "From main system"
+        hotwater_ending_config = HotWaterAttributes(new_epc_description).process()
+        hotwater_simulation_config = check_simulation_difference(
+            new_config=hotwater_ending_config, old_config=self.property.hotwater
+        )
+
+        if self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]:
+            new_efficiency = "Good"
+        else:
+            new_efficiency = self.property.data["hot-water-energy-eff"]
+
+        simulation_config = {
+            "hot_water_energy_eff_ending": new_efficiency,
+            **hotwater_simulation_config
+        }
+
+        to_append = {
+            "phase": phase,
+            "parts": [],
+            "type": "cylinder_thermostat",
+            "measure_type": "cylinder_thermostat",
+            "description": description,
+            "starting_u_value": None,
+            "new_u_value": None,
+            "sap_points": None,
+            "already_installed": already_installed,
+            **total_cost,
+            "simulation_config": simulation_config,
+            "description_simulation": {
+                "hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
+                "hotwater-description": new_epc_description,
+            },
+            "survey": False
+        }
+
+        self.recommendations.append(to_append)
        return
--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@ -4,6 +4,7 @@ from backend.Property import Property
 from typing import List
 from recommendations.Costs import Costs
 from recommendations.recommendation_utils import override_costs
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings


 class LightingRecommendations:
@ -161,6 +162,7 @@ class LightingRecommendations:
                # the proportion of lights that will be set to low energy
                "sap_points": sap_points,
                "kwh_savings": heat_demand_change,
+                "energy_cost_savings": heat_demand_change * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
                "co2_equivalent_savings": carbon_change,
                "description_simulation": {
                    "lighting-energy-eff": "Very Good",
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@ -142,19 +142,17 @@ class Recommendations:

        # Ventilation recommendations
        # We only produce a ventilation recommendation if the property is recommended to have wall or roof
-        # insulation
-        # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this
-        # has no
-        # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
-        # have any
-        # wall or roof recommendations, we will ensure that ventilation is included in the simulation
+        # insulation We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this
+        # has no real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
+        # have any wall or roof recommendations, we will ensure that ventilation is included in the simulation
        if (
            (self.wall_recomender.recommendations or self.roof_recommender.recommendations) and
            ("ventilation" in measures)
        ):
-            self.ventilation_recomender.recommend()
+            self.ventilation_recomender.recommend(phase=phase)
            if self.ventilation_recomender.recommendation:
                property_recommendations.append(self.ventilation_recomender.recommendation)
+                phase += 1

        if "trickle_vents" in measures:
            # This is a recommendatin that typically comes from an energy assessment
@ -211,27 +209,25 @@ class Recommendations:
            measures=measures,
            has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
        )
-        if (
-            self.heating_recommender.heating_recommendations or
-            self.heating_recommender.heating_control_recommendations
-        ):
+        if self.heating_recommender.heating_recommendations:

            # We split into first and second phase recommendations
            first_phase_recommendations = [
                r for r in (
-                    self.heating_recommender.heating_recommendations +
-                    self.heating_recommender.heating_control_recommendations
+                    self.heating_recommender.heating_recommendations
                )
                if r["phase"] == phase
            ]
            second_phase_recommendations = [
                r for r in (
-                    self.heating_recommender.heating_recommendations +
-                    self.heating_recommender.heating_control_recommendations
+                    self.heating_recommender.heating_recommendations
                )
                if r["phase"] == phase + 1
            ]

+            if first_phase_recommendations and second_phase_recommendations:
+                raise Exception("Imeplement me")
+
            if first_phase_recommendations:
                property_recommendations.append(first_phase_recommendations)

@ -243,8 +239,7 @@ class Recommendations:
            # otherwise we incremenet by 1
            max_used_phase = max(
                [rec["phase"] for rec in
-                 self.heating_recommender.heating_recommendations +
-                 self.heating_recommender.heating_control_recommendations]
+                 self.heating_recommender.heating_recommendations]
            )
            amount_to_increment = max_used_phase - phase + 1
            phase += amount_to_increment
@ -253,8 +248,13 @@ class Recommendations:
        if "hot_water" in measures:
            self.hotwater_recommender.recommend(phase=phase)
            if self.hotwater_recommender.recommendations:
-                property_recommendations.append(self.hotwater_recommender.recommendations)
-                phase += 1
+                if len(self.hotwater_recommender.recommendations) > 1:
+                    for r in self.hotwater_recommender.recommendations:
+                        property_recommendations.append([r])
+                        phase += 1
+                else:
+                    property_recommendations.append(self.hotwater_recommender.recommendations)
+                    phase += 1

        if "secondary_heating" in measures:
            self.secondary_heating_recommender.recommend(phase=phase)
@ -304,12 +304,12 @@ class Recommendations:
            # want to include the cavity wall insulation recommendation in the defaults

            if recommendations_by_type[0].get("type") in [
-                "mechanical_ventilation", "trickle_vents", "draught_proofing"
+                "trickle_vents", "draught_proofing"
            ]:
                continue

            has_u_value = recommendations_by_type[0].get("new_u_value") is not None
-            has_sap_points = recommendations_by_type[0].get("sap_points") is not None
+            has_sap_points = all([r.get("sap_points") is not None for r in recommendations_by_type])
            has_rank = recommendations_by_type[0].get("rank") is not None

            # When check if these recommendations have two different types, such as solid wall insulation
@ -447,6 +447,7 @@ class Recommendations:
        property_instance,
        all_predictions,
        recommendations,
+        representative_recommendations,
    ):

        """
@ -460,6 +461,7 @@ class Recommendations:
        :param property_instance: Instance of the Property class, for the home associated to property_id
        :param all_predictions: dictionary of predictions from the model apis
        :param recommendations: dictionary of recommendations for the property
+        :param representative_recommendations: dictionary of representative recommendations for the property
        :return:
        """

@ -471,15 +473,20 @@ class Recommendations:

        property_recommendations = recommendations[property_instance.id].copy()

+        representative_recs = representative_recommendations[property_instance.id].copy()
+        representative_ids = [r["recommendation_id"] for r in representative_recs]
+
        increasing_variables = ["sap"]
        decreasing_variables = ["carbon", "heat_demand"]

+        # If the recommendation is mechanical ventilation, we don't apply the rule that the new value should be higher
+        mv_increasing_variables = ["carbon", "heat_demand"]
+        mv_decreasing_variables = ["sap"]
+
        impact_summary = []
        for recommendations_by_type in property_recommendations:
            for rec in recommendations_by_type:
-                if rec["type"] in [
-                    "mechanical_ventilation", "trickle_vents", "draught_proofing", "extension_cavity_wall_insulation"
-                ]:
+                if rec["type"] in ["trickle_vents", "draught_proofing", "extension_cavity_wall_insulation"]:
                    # We don't have a percieved sap impact of mechanical ventilation or trickle vents, and we don't
                    # have the capacity to score draught proofing
                    if rec["type"] == "extension_cavity_wall_insulation":
@ -497,7 +504,9 @@ class Recommendations:
                        impact_summary.append(
                            {
                                "phase": rec["phase"],
+                                "representative": rec["recommendation_id"] in representative_ids,
                                "recommendation_id": rec["recommendation_id"],
+                                "measure_type": rec["measure_type"],
                                "sap": sap + rec["sap_points"],
                                "carbon": carbon - rec["co2_equivalent_savings"],
                                "heat_demand": heat_demand - rec["heat_demand"],
@ -519,15 +528,21 @@ class Recommendations:
                    # heating_cost_starting and heating_cost_ending are just the values in the EPC. However, with
                    # heating_cost_ending, we expect that the EPC will predict a heating cost based on what would happen
                    # if we implemented the recommendation today, so our starting value is the EPC
+
                    previous_phase_values = {
                        "sap": float(property_instance.data["current-energy-efficiency"]),
+                        # For carbon, even though we generally use the updated figure which includes the carbon
+                        # associated to appliances, for this scoring process we use the EPC carbon value. This means
+                        # that we don't overestimate the impact since the model uses the EPC carbon value
                        "carbon": float(property_instance.data["co2-emissions-current"]),
                        "heat_demand": float(property_instance.data["energy-consumption-current"]),
                    }

                else:

-                    previous_phase_values_multiple = [x for x in impact_summary if x["phase"] == (rec["phase"] - 1)]
+                    previous_phase_values_multiple = [
+                        x for x in impact_summary if x["phase"] == (rec["phase"] - 1) and x["representative"]
+                    ]
                    if len(previous_phase_values_multiple) != 1:
                        # Take an average of each of the previous phases
                        keys_to_median = ["sap", "carbon", "heat_demand"]
@ -541,8 +556,13 @@ class Recommendations:
                        previous_phase_values = previous_phase_values_multiple[0]

                # We extract the values for the current phase
+                if rec.get("survey", False):
+                    current_phase_sap = rec["sap_points"] + previous_phase_values["sap"]
+                else:
+                    current_phase_sap = phase_energy_efficiency_metrics["sap_change"]
+
                current_phase_values = {
-                    "sap": phase_energy_efficiency_metrics["sap_change"],
+                    "sap": current_phase_sap,
                    "carbon": phase_energy_efficiency_metrics["carbon_change"],
                    "heat_demand": phase_energy_efficiency_metrics["heat_demand"],
                }
@ -552,13 +572,23 @@ class Recommendations:
                # For decreasing variables, the new value should be lower than the previous, otherwise we set it to
                # the previous
                # In either case, we adjudge the recommendation to have had no/negligible impact
-                for v in increasing_variables:
+                # However, if the recommendation is mechanical ventilation, this can have a negative SAP impact so
+                # we don't apply this rule
+
+                if rec["type"] == "mechanical_ventilation":
+                    phase_increasing_variables = mv_increasing_variables
+                    phase_decreasing_variables = mv_decreasing_variables
+                else:
+                    phase_increasing_variables = increasing_variables
+                    phase_decreasing_variables = decreasing_variables
+
+                for v in phase_increasing_variables:
                    current_phase_values[v] = (
                        current_phase_values[v] if current_phase_values[v] > previous_phase_values[v] else
                        previous_phase_values[v]
                    )
                for v in previous_phase_values:
-                    if v in decreasing_variables:
+                    if v in phase_decreasing_variables:
                        current_phase_values[v] = (
                            current_phase_values[v] if current_phase_values[v] < previous_phase_values[v] else
                            previous_phase_values[v]
@ -573,13 +603,19 @@ class Recommendations:
                    "heat_demand": previous_phase_values["heat_demand"] - current_phase_values["heat_demand"],
                }

-                # Prevent from being negative
+                # Prevent from being negative - apart from ventilation
                for metric in ["sap", "carbon", "heat_demand"]:
-                    property_phase_impact[metric] = (
-                        0 if property_phase_impact[metric] < 0 else property_phase_impact[metric]
-                    )
-                    if metric == "sap":
-                        property_phase_impact[metric] = round(property_phase_impact[metric], 2)
+                    if rec["type"] != "mechanical_ventilation":
+                        property_phase_impact[metric] = (
+                            0 if property_phase_impact[metric] < 0 else property_phase_impact[metric]
+                        )
+                        if metric == "sap":
+                            property_phase_impact[metric] = round(property_phase_impact[metric], 2)
+                    else:
+                        # We prevent these from being positive
+                        property_phase_impact[metric] = (
+                            0 if property_phase_impact[metric] > 0 else property_phase_impact[metric]
+                        )

                # For the moment, we cap the number of SAP points that can be achieved by LEDs at 2
                if rec["type"] == "low_energy_lighting":
@ -599,11 +635,18 @@ class Recommendations:
                    # By limiting here, we don't change the value in current_phase_values. This means that the
                    # future recommendations won't have an impact that is too large
                    li_sap_limit = RoofRecommendations.get_loft_insulation_sap_limit(
-                        property_instance.data["roof-energy-eff"], property_instance.data["extension-count"]
+                        property_instance.data["roof-energy-eff"], property_instance.roof["insulation_thickness"]
                    )
                    if li_sap_limit is not None:
                        property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit)

+                if rec["type"] == "solar_pv":
+                    # We use the SAP points in the recommendation as a minimum
+                    property_phase_impact["sap"] = (
+                        rec["sap_points"] if property_phase_impact["sap"] < rec["sap_points"] else
+                        property_phase_impact["sap"]
+                    )
+
                # Insert this information into the recommendation.
                if not rec.get("survey", False):
                    rec["sap_points"] = property_phase_impact["sap"]
@ -620,7 +663,9 @@ class Recommendations:
                impact_summary.append(
                    {
                        "phase": rec["phase"],
+                        "representative": rec["recommendation_id"] in representative_ids,
                        "recommendation_id": rec["recommendation_id"],
+                        "measure_type": rec["measure_type"],
                        **current_phase_values
                    }
                )
@ -628,7 +673,9 @@ class Recommendations:
        return property_recommendations, impact_summary

    @staticmethod
-    def map_descriptions_to_fuel(heating_description, hotwater_description, main_fuel_description):
+    def map_descriptions_to_fuel(
+        heating_description, hotwater_description, main_fuel_description, descriptions_to_fuel_types
+    ):

        # Handle the case of community schemes
        if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"):
@ -641,7 +688,7 @@ class Recommendations:
                }
            raise NotImplementedError("Handle this case")

-        mapped = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
+        mapped = descriptions_to_fuel_types[heating_description]
        heating_fuel = mapped["fuel"]

        if hotwater_description in [
@ -661,7 +708,7 @@ class Recommendations:
                "heating_cop": mapped["cop"], "hotwater_cop": 1
            }

-        mapped_hotwater = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[hotwater_description]
+        mapped_hotwater = descriptions_to_fuel_types[hotwater_description]

        return {
            "heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"],
@ -670,17 +717,24 @@ class Recommendations:

    @classmethod
    def calculate_recommendation_tenant_savings(
-        cls, property_instance, kwh_simulation_predictions, property_recommendations
+        cls, property_instance, kwh_simulation_predictions, property_recommendations, ashp_cop=None
    ):
        """
        This method inserts the kwh savings and the bill savings that the customer will make from the recommendations
        based on the predictions from the ML model
+
+        It also ensures we base our solar savings and solar carbon savings from the calculations based on
+        the solar API and size of the array, instead of ML model
+
        :param property_instance: Instance of the Property class, for the home associated to property_id
        :param kwh_simulation_predictions: dictionary of predictions from the model apis
        :param property_recommendations: dictionary of recommendations for the property
+        :param ashp_cop: The coefficient of performance for the air source heat pump.
        :return:
        """

+        ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY
+
        kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
            kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id)
            ].merge(
@ -739,22 +793,42 @@ class Recommendations:
            ]
        ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)

+        # We need the recommendaion type
+        rec_id_to_type = {
+            rec["recommendation_id"]: rec["type"] for recs in property_recommendations for rec in recs
+        }
+        rec_id_to_type[STARTING_DUMMY_ID_VALUE] = "starting_dummy"
+
        for i in range(0, len(kwh_impact_table)):
-            current_phase = kwh_impact_table.loc[i, 'phase']
+            current = kwh_impact_table.loc[i]
+            current_phase = current['phase']
            previous_phase_id = (current_phase - 1) if (current_phase > 0) else -9999
            previous_phase = kwh_impact_table[kwh_impact_table['phase'] == previous_phase_id]

            if not previous_phase.empty:
                for col in ["predictions_heating", "predictions_hotwater"]:
+                    # Check if the recommendation type is ventilation
+                    if rec_id_to_type[current["recommendation_id"]] == "mechanical_ventilation":
+                        # We expect the kwh to increase
+                        if kwh_impact_table.loc[i, col] > previous_phase[col].max():
+                            continue
+
                    if kwh_impact_table.loc[i, col] > previous_phase[col].max():
                        kwh_impact_table.loc[i, col] = previous_phase[col].max()

+        descriptions_to_fuel_types = assumptions.DESCRIPTIONS_TO_FUEL_TYPES
+        # We will the air source heat pump efficiencies
+        ashp_keys = [k for k in descriptions_to_fuel_types.keys() if "air source heat pump" in k.lower()]
+        for k in ashp_keys:
+            descriptions_to_fuel_types[k]["cop"] = ashp_cop
+
        # For heating system recommendations, this could result in a fuel type change so we reflect that
        fuel_mapping = pd.DataFrame([
            {
                "id": epc["id"],
                **cls.map_descriptions_to_fuel(
-                    epc["mainheat-description"], epc["hotwater-description"], epc["main-fuel"]
+                    epc["mainheat-description"], epc["hotwater-description"], epc["main-fuel"],
+                    descriptions_to_fuel_types
                )
            } for epc in property_instance.updated_simulation_epcs
        ])
@ -768,7 +842,8 @@ class Recommendations:
                            **cls.map_descriptions_to_fuel(
                                property_instance.data["mainheat-description"],
                                property_instance.data["hotwater-description"],
-                                property_instance.data["main-fuel"]
+                                property_instance.data["main-fuel"],
+                                descriptions_to_fuel_types
                            )
                        }
                    ]
@ -797,7 +872,7 @@ class Recommendations:
        for recs in property_recommendations:
            for rec in recs:
                if rec["type"] in [
-                    "mechanical_ventilation", "trickle_vents", "draught_proofing", "extension_cavity_wall_insulation"
+                    "trickle_vents", "draught_proofing", "extension_cavity_wall_insulation"
                ]:
                    # We cannot score the impact on draught proofing
                    continue
@ -808,6 +883,12 @@ class Recommendations:

                if rec["type"] == "solar_pv":
                    rec["kwh_savings"] = rec_impact["solar_kwh_savings"].values[0]
+
+                    # Calculate carbon savings from this - emissions in kg and convert to tonnes
+                    emissions_kg = rec["kwh_savings"] * assumptions.ELECTRICITY_CARBON_INTENSITY
+                    emissions_tonnes = emissions_kg / 1000
+
+                    rec["co2_equivalent_savings"] = emissions_tonnes
                    rec["energy_cost_savings"] = (
                        rec_impact["solar_kwh_savings"].values[0] * AnnualBillSavings.ELECTRICITY_PRICE_CAP
                    )
@ -816,13 +897,18 @@ class Recommendations:
                heating_kwh_savings = (
                    previous_phase_impact["predictions_heating"].mean() - rec_impact["predictions_heating"].values[0]
                )
-                heating_cost_savings = (
-                    previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0]
-                )
-
                hotwater_kwh_savings = (
                    previous_phase_impact["predictions_hotwater"].mean() - rec_impact["predictions_hotwater"].values[0]
                )
+
+                # Shouldn't be positive
+                if rec["type"] == "mechanical_ventilation":
+                    heating_kwh_savings = 0 if heating_kwh_savings > 0 else heating_kwh_savings
+                    hotwater_kwh_savings = 0 if hotwater_kwh_savings > 0 else hotwater_kwh_savings
+
+                heating_cost_savings = (
+                    previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0]
+                )
                hotwater_host = (
                    previous_phase_impact["hotwater_cost"].mean() - rec_impact["hotwater_cost"].values[0]
                )
@ -830,9 +916,8 @@ class Recommendations:
                total_kwh_savings = heating_kwh_savings + hotwater_kwh_savings
                energy_cost_savings = heating_cost_savings + hotwater_host

-                if rec["type"] == "lighting":
-                    # In this case, we should probably just SKIP but check when we have one!
-                    raise Exception("Implement me 3")
+                if rec["type"] == "low_energy_lighting":
+                    continue

                rec["kwh_savings"] = total_kwh_savings
                rec["energy_cost_savings"] = energy_cost_savings
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@ -52,6 +52,10 @@ class RoofRecommendations:
            part for part in materials if part["type"] == "flat_roof_insulation"
        ]

+        self.room_roof_insulation_materials = [
+            part for part in materials if part["type"] == "room_roof_insulation"
+        ]
+
        # Extract the insulation thickness from the roof, which is used throughout this method
        self.insulation_thickness = convert_thickness_to_numeric(
            self.property.roof["insulation_thickness"],
@ -60,16 +64,16 @@ class RoofRecommendations:
        )

    @classmethod
-    def get_loft_insulation_sap_limit(cls, roof_energy_eff, extension_count):
+    def get_loft_insulation_sap_limit(cls, roof_energy_eff, existing_thickness):
        """
        Get the SAP limit for loft insulation
        :param roof_energy_eff:
        :return:
        """

-        if extension_count == 0:
-            # No limit
-            return None
+        if str(existing_thickness).isdigit():
+            if float(existing_thickness) >= 250:
+                return 0

        if roof_energy_eff in ["Good", "Very Good"]:
            return 1
@ -123,7 +127,11 @@ class RoofRecommendations:
            self.property.roof["insulation_thickness"] in ["average", "above_average"]
        )

-        return full_insulated_room_roof or room_roof_insulated_at_rafters
+        has_non_invasive_recommendation = any(
+            x["type"] == "room_roof_insulation" for x in self.property.non_invasive_recommendations
+        )
+
+        return (full_insulated_room_roof or room_roof_insulated_at_rafters) and not has_non_invasive_recommendation

    def recommend(self, phase, measures=None, default_u_values=False):

@ -134,6 +142,10 @@ class RoofRecommendations:

        u_value = self.property.roof["thermal_transmittance"]

+        # If we have a flat roof but we don't have flat roof as a measure, we exit
+        if self.property.roof["is_flat"] and "flat_roof_insulation" not in measures:
+            return
+
        # We check if the roof is already insulated and if so, we exit

        # Building regulations part L recommend installing at least 270mm of insulation, however generally we
@ -148,6 +160,9 @@ class RoofRecommendations:
        if self.is_room_roof_insulated_or_unsuitable(measures):
            return

+        if self.property.roof["is_thatched"]:
+            return
+
        # If we have a u-value already, need to implement this
        if u_value:
            if u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
@ -181,7 +196,8 @@ class RoofRecommendations:

        # We firstly handle non-intrusive recommendations, which may override the normal roof insulation recommendations
        if ("loft_insulation" in [x["type"] for x in non_invasive_recommendations]) or (
-            self.property.roof["is_pitched"] and "loft_insulation" in measures
+            self.property.roof["is_pitched"] and "loft_insulation" in measures and
+            not self.property.roof["is_at_rafters"]
        ):
            self.recommend_roof_insulation(
                u_value=u_value,
@ -282,6 +298,11 @@ class RoofRecommendations:

        insulation_materials = pd.DataFrame(insulation_materials)

+        non_invasive_recommendations = next(
+            (r for r in self.property.non_invasive_recommendations if
+             r["type"] == insulation_materials["type"].values[0]), {}
+        )
+
        lowest_selected_u_value = None
        recommendations = []
        for _, insulation_material_group in insulation_materials.groupby("description"):
@ -421,14 +442,15 @@ class RoofRecommendations:
                            "description": self.make_roof_insulation_description(material),
                            "starting_u_value": u_value,
                            "new_u_value": new_u_value,
-                            "sap_points": None,
+                            "sap_points": non_invasive_recommendations.get("sap_points", 0),
                            "already_installed": already_installed,
                            "simulation_config": simulation_config,
                            "description_simulation": {
                                "roof-description": new_description,
                                "roof-energy-eff": new_efficiency
                            },
-                            **cost_result
+                            **cost_result,
+                            "survey": non_invasive_recommendations.get("survey", False)
                        }
                    )

@ -478,28 +500,22 @@ class RoofRecommendations:
        :return:
        """

-        # TODO: We temporarilty use costs from SCIS for RIR insulation. The costing was £180/m2 floor
-        roof_roof_insulation_materials = [
-            {
-                "type": "room_roof_insulation",
-                "description": "Insulating the ceiling of the roof roof and re-decorate",
-                "depths": [100],
-                "depth_unit": "mm",
-                "r_value_per_mm": 0.038,
-                "thermal_conductivity": 0.022,
-                "cost": [180],
-            }
-        ]
+        # We have a list of materials that can be used for room roof insulation
+        # We will iterate over these materials and recommend them based on the current u-value of the roof
+        # and the cost of the materials

        rir_non_invasive_recommendation = next(
            (x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {}
        )

+        insulation_materials = pd.DataFrame(self.room_roof_insulation_materials)
+
        # lowest_selected_u_value = None
        recommendations = []
-        for material in roof_roof_insulation_materials:
-            for depth, cost_per_unit in zip(material["depths"], material["cost"]):
-                part_u_value = r_value_per_mm_to_u_value(depth, material["r_value_per_mm"])
+        for _, material_group in insulation_materials.groupby("description"):
+            for material in material_group.itertuples():
+
+                part_u_value = r_value_per_mm_to_u_value(material.depth, material.r_value_per_mm)

                _, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
                new_u_value = math.ceil(new_u_value * 100.0) / 100.0
@ -507,13 +523,11 @@ class RoofRecommendations:
                # We allow a small tolerance for error so we don't discount the recommendation entirely

                estimated_cost = (
-                    cost_per_unit * self.property.insulation_floor_area if
+                    material.total_cost * self.property.insulation_floor_area if
                    rir_non_invasive_recommendation.get("cost") is None else
                    rir_non_invasive_recommendation.get("cost")
                )

-                sap_points = rir_non_invasive_recommendation.get("sap_points", None)
-
                # Could also be Roof room(s), ceiling insulated
                new_descriptin = "Roof room(s), insulated"
                roof_ending_config = RoofAttributes(new_descriptin).process()
@ -562,7 +576,7 @@ class RoofRecommendations:
                        "description": "Insulate room in roof at rafters and re-decorate",
                        "starting_u_value": u_value,
                        "new_u_value": new_u_value,
-                        "sap_points": sap_points,
+                        "sap_points": rir_non_invasive_recommendation.get("sap_points", None),
                        "simulation_config": simulation_config,
                        "description_simulation": {
                            "roof-description": new_descriptin,
--- a/recommendations/SecondaryHeating.py
+++ b/recommendations/SecondaryHeating.py
@ -9,12 +9,6 @@ class SecondaryHeating:
    system.
    """

-    # The list of existing heating systems that are accepted
-    ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas"]
-    ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric"]
-    # These are the heaters where works are required to remove them
-    FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"]
-
    def __init__(self, property_instance: Property):
        self.property = property_instance
        self.costs = Costs(self.property)
@ -25,18 +19,10 @@ class SecondaryHeating:
        # Reset
        self.recommendation = []

-        if self.property.main_heating["clean_description"] not in self.ACCEPTED_MAINHEAT_DESCRIPTIONS:
-            return
-
-        # TODO: We need to clean secondary data
-        if self.property.data['secondheat-description'] not in self.ACCEPTED_SECONDHEAT_DESCRIPTIONS:
-            return
-
-        if self.property.data['secondheat-description'] in self.FIXED_HEATER_DESCRIPTIONS:
-            # We have an associated cost otherwise, there is no cost
-            n_rooms = self.property.data['number-heated-rooms']
+        if self.property.data['number-habitable-rooms'] > self.property.data['number-heated-rooms']:
+            n_rooms = self.property.data['number-habitable-rooms'] - self.property.data['number-heated-rooms']
        else:
-            n_rooms = 0
+            n_rooms = self.property.data["number-heated-rooms"]

        costs = self.costs.heater_removal(n_rooms=n_rooms)

--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@ -1,24 +1,39 @@
 import numpy as np
 import pandas as pd
+import backend.app.assumptions as assumptions

 from recommendations.Costs import Costs
 from recommendations.recommendation_utils import override_costs, estimate_pitched_roof_area


 class SolarPvRecommendations:
-    # Solar panel specs based on Eurener 400s solar panels
-    # https://midsummerwholesale.co.uk/buy/eurener/eurener-400w-mepv-zebra-ab-half-cut-mono
-    # Approximate area of the solar panels
-    SOLAR_PANEL_AREA = 1.79
-    # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
-    # This was previously set to 250w, but has been upped to 400 based on the systems used by Cotswolrd Energy Group
-    SOLAR_PANEL_WATTAGE = 400
-
+    # For domestic properties, we don't recommend a solar PV system with wattage outside of these
+    # bounds
    MAX_SYSTEM_WATTAGE = 6000
    MIN_SYSTEM_WATTAGE = 1000

+    # the maximum area of root we allow to be covered in solar panels for our recommendations.
    MAX_ROOF_AREA_PERCENTAGE = 0.7

+    SAP_POINTS_PER_5_PERCENT_ROOF_COVERAGE = 1
+
+    BACKUP_PANEL_PERFORMANCE = pd.DataFrame(
+        [
+            {
+                "n_panels": 4,
+                "array_wattage": 1600,
+                "initial_ac_kwh_per_year": assumptions.MEDIAN_WATTAGE_TO_AC * 1600,
+                "panneled_roof_area": 4 * assumptions.RDSAP_AREA_PER_PANEL
+            },
+            {
+                "n_panels": 8,
+                "array_warrage": 3200,
+                "initial_ac_kwh_per_year": assumptions.MEDIAN_WATTAGE_TO_AC * 3200,
+                "panneled_roof_area": 8 * assumptions.RDSAP_AREA_PER_PANEL
+            },
+        ]
+    )
+
    def __init__(self, property_instance):
        """
        :param property_instance: Instance of the Property class, for the home associated to property_id
@ -42,46 +57,6 @@ class SolarPvRecommendations:

        return trimmed_list

-    def mds_recommend(self, phase=None, solar_pv_percentage=0.5):
-        # For specific usage within the mds report
-
-        solar_pv_roof_area = self.property.get_solar_pv_roof_area(solar_pv_percentage)
-
-        number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
-        solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
-
-        solar_panel_wattage = np.clip(
-            a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
-        )
-
-        # We now have a property which is potentially suitable for solar PV
-        roof_coverage_percent = round(solar_pv_percentage * 100)
-        # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
-        # of solar PV installations
-        cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=False)
-        kw = np.floor(solar_panel_wattage / 100) / 10
-
-        description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
-                       f"anel system on {round(roof_coverage_percent)}% the roof.")
-
-        return [
-            {
-                "phase": phase,
-                "parts": [],
-                "type": "solar_pv",
-                "description": description,
-                "starting_u_value": None,
-                "new_u_value": None,
-                "sap_points": None,
-                "already_installed": False,
-                **cost_result,
-                # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
-                # back up here
-                "photo_supply": roof_coverage_percent,
-                "has_battery": False
-            }
-        ]
-
    def recommend_building_analysis(self, phase):
        """
        This recommendation approach handles the case of producing solar PV recommendations at the building level,
@ -103,13 +78,22 @@ class SolarPvRecommendations:
        for rank, recommendation_config in best_configurations.iterrows():
            # If we dont have the panneled_roof_area in the recommendation_config we calculate it
            if recommendation_config.get("panneled_roof_area", None):
-                roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
+                # We spread the coverage across the individual units
+                roof_coverage_percent = round(
+                    ((recommendation_config["panneled_roof_area"] / total_roof_area) * 100) / n_units
+                )
            else:
                raise Exception("IMPLEMENT ME")
+
+            n_floors = (
+                self.property.number_of_storeys["number_of_storeys"] if
+                self.property.number_of_storeys["number_of_storeys"] is not None else 3
+            )
+
            total_cost = self.costs.solar_pv(
                array_cost=recommendation_config.get("cost", None),
                n_panels=recommendation_config["n_panels"],
-                n_floors=self.property.number_of_storeys["number_of_storeys"],
+                n_floors=n_floors,
                needs_inverter=True,
            )["total"] / n_units

@ -203,6 +187,20 @@ class SolarPvRecommendations:
            roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100)
            # We round up to the nearest 5
            roof_coverage_percent = np.ceil(roof_coverage_percent / 5) * 5
+
+            # Typically, we've observed that every 5% of additional roof coverage will result in at least
+            # an additional 1 SAP points (though often 2 points) Given this, we can add a reasonable minimum
+            # for the number of SAP points we might expect. We've observed that for some cases where properties
+            # are hitting the higher SAP scores (e.g. EPC A and above), the model can sometimes under-predict
+            # the number of SAP points. This appears to be due to a relatively small number of properties
+            # actually achieving the upper echelons of EPC rating. This can be the case if we're simulating a
+            # whole house retrofit where the home is getting complete insulation, a heat pump and solar panels.
+            # Because panels are the final recommendation, they are often the measure that takes the home
+            # into the medium to high EPC A ranges and so because of a lack of training data, this means that
+            # we might sometime under-predict. This minimum is intended to try and reduce the negative impact
+            # of this. This minimum is used in Recommendations.calculate_recommendation_impact
+            minimum_sap_points = (roof_coverage_percent / 5) * self.SAP_POINTS_PER_5_PERCENT_ROOF_COVERAGE
+
            for has_battery in [False, True]:
                cost_result = self.costs.solar_pv(
                    has_battery=has_battery,
@ -212,11 +210,14 @@ class SolarPvRecommendations:
                )
                kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
                if has_battery:
-                    description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on "
-                                   f"{round(roof_coverage_percent)}% the roof, with a battery storage system.")
+                    description = (
+                        f"Install a {kw} kilowatt-peak (kWp) solar panel system, with a battery."
+                    )
                else:
-                    description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
-                                   f"anel system on {round(roof_coverage_percent)}% the roof.")
+                    description = f"Install a {kw} kilowatt-peak (kWp) solar panel system."
+
+                if self.property.in_conservation_area:
+                    description += " Property is in a consevation area - please check with local planning authority."

                already_installed = "solar_pv" in self.property.already_installed
                if already_installed:
@ -231,7 +232,7 @@ class SolarPvRecommendations:
                        "description": description,
                        "starting_u_value": None,
                        "new_u_value": None,
-                        "sap_points": None,
+                        "sap_points": minimum_sap_points,
                        "already_installed": already_installed,
                        **cost_result,
                        # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@ -29,7 +29,7 @@ class VentilationRecommendations(Definitions):
    def identify_ventilation(self):
        self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS

-    def recommend(self):
+    def recommend(self, phase):
        """
        If there is no ventilation, we recommend installing ventilation

@ -63,7 +63,7 @@ class VentilationRecommendations(Definitions):
        # We recommend installing two mechanical ventilation systems
        self.recommendation = [
            {
-                "phase": None,
+                "phase": phase,
                "parts": part,
                "type": part[0]["type"],
                "measure_type": "mechanical_ventilation",
@ -79,7 +79,13 @@ class VentilationRecommendations(Definitions):
                "total": estimated_cost,
                # We use a very simple and rough estimate of 4 hours per unit
                "labour_hours": labour_hours,
-                "labour_days": labour_days  # Assume 8 hour day
+                "labour_days": labour_days,  # Assume 8 hour day
+                "simulation_config": {
+                    "mechanical_ventilation_ending": "mechanical, extract only",
+                },
+                "description_simulation": {
+                    "mechanical-ventilation": "mechanical, extract only"
+                }
            }
        ]

--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@ -385,6 +385,11 @@ class WallRecommendations(Definitions):
        if insulation_thickness == "below average":
            cavity_width = cavity_width * (1 - PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION)

+        non_invasive_recommendations = next(
+            (r for r in self.property.non_invasive_recommendations if
+             r["type"] == insulation_materials["type"].values[0]), {}
+        )
+
        # Test the different fill options
        lowest_selected_u_value = None
        recommendations = []
@ -475,14 +480,15 @@ class WallRecommendations(Definitions):
                        "description": description,
                        "starting_u_value": u_value,
                        "new_u_value": new_u_value,
-                        "sap_points": None,
+                        "sap_points": non_invasive_recommendations.get("sap_points", None),
                        "already_installed": already_installed,
                        "simulation_config": simulation_config,
                        "description_simulation": {
                            "walls-description": "Cavity wall, filled cavity",
                            "walls-energy-eff": "Good"
                        },
-                        **cost_result
+                        **cost_result,
+                        "survey": non_invasive_recommendations.get("survey", False)
                    }
                )

@ -540,15 +546,10 @@ class WallRecommendations(Definitions):

        lowest_selected_u_value = None
        recommendations = []
-
-        iwi_non_invasive_recommendations = next(
-            (r for r in self.property.non_invasive_recommendations if r["type"] == "internal_wall_insulation"), {}
+        non_invasive_recommendations = next(
+            (r for r in self.property.non_invasive_recommendations if
+             r["type"] == insulation_materials["type"].values[0]), {}
        )
-        ewi_non_invasive_recommendations = next(
-            (r for r in self.property.non_invasive_recommendations if r["type"] == "external_wall_insulation"), {}
-        )
-        if ewi_non_invasive_recommendations:
-            raise NotImplementedError("Implement ewi non-invasive recommendations")

        for _, insulation_material_group in insulation_materials.groupby("description"):

@ -590,31 +591,25 @@ class WallRecommendations(Definitions):
                    if already_installed:
                        cost_result = override_costs(cost_result)

+                    if non_invasive_recommendations.get("cost") is not None:
+                        raise NotImplementedError(
+                            "Not handled passing costs from non-invasive recommendations for iwi"
+                        )
+
                    if material["type"] == "internal_wall_insulation":
-
-                        if iwi_non_invasive_recommendations.get("cost") is not None:
-                            raise NotImplementedError(
-                                "Not handled passing costs from non-invasive recommendations for iwi"
-                            )
-
-                        sap_points = iwi_non_invasive_recommendations.get("sap_points", None)
-                        survey = iwi_non_invasive_recommendations.get("survey", False)
-
                        new_description = self.get_internal_external_wall_description(
                            self.INTERNALLY_INSULATED_WALL_DESCRIPTIONS, new_u_value
                        )
-
                    elif material["type"] == "external_wall_insulation":
-
-                        sap_points = ewi_non_invasive_recommendations.get("sap_points", None)
-                        survey = ewi_non_invasive_recommendations.get("survey", False)
-
                        new_description = self.get_internal_external_wall_description(
                            self.EXTERNALLY_INSULATED_WALL_DESCRIPTIONS, new_u_value
                        )
                    else:
                        raise ValueError("Invalid material type")

+                    sap_points = non_invasive_recommendations.get("sap_points", None)
+                    survey = non_invasive_recommendations.get("survey", False)
+
                    wall_ending_config = WallAttributes(new_description).process()

                    walls_simulation_config = check_simulation_difference(
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@ -215,21 +215,29 @@ class WindowsRecommendations:
            "glazed-type": glazed_type_ending,
        }

+        measure_type = "double_glazing" if not is_secondary_glazing else "secondary_glazing"
+
+        non_invasive_recommendation = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] in ["windows_glazing", measure_type]),
+            {}
+        )
+
        self.recommendation = [
            {
                "phase": phase,
                "parts": [],
                "type": "windows_glazing",
-                "measure_type": "double_glazing" if not is_secondary_glazing else "secondary_glazing",
+                "measure_type": measure_type,
                "description": description,
                "starting_u_value": None,
                "new_u_value": None,
-                "sap_points": None,
+                "sap_points": non_invasive_recommendation.get("sap_points", None),
                "already_installed": already_installed,
                **cost_result,
                "is_secondary_glazing": is_secondary_glazing,
                "description_simulation": description_simulation,
                "simulation_config": simulation_config,
+                "survey": non_invasive_recommendation.get("survey", None),
            }
        ]

--- a/recommendations/county_to_region.py
+++ b/recommendations/county_to_region.py
@ -111,8 +111,11 @@ county_to_region_map = {
    'Windsor and Maidenhead': 'South East England', 'Woking': 'South East England', 'Wokingham': 'South East England',
    'Worthing': 'South East England', 'Wycombe': 'South East England',
    'Bath and North East Somerset': 'South West England', 'Bournemouth': 'South West England',
-    'Bristol': 'South West England', 'Cheltenham': 'South West England', 'Christchurch': 'South West England',
-    'City of Bristol': 'South West England', 'Cornwall': 'South West England', 'Cotswold': 'South West England',
+    'Bristol': 'South West England',
+    'Cheltenham': 'South West England', 'Christchurch': 'South West England',
+    'City of Bristol': 'South West England',
+    'Bristol, City of': 'South West England',
+    'Cornwall': 'South West England', 'Cotswold': 'South West England',
    'Devon': 'South West England', 'Dorset': 'South West England', 'East Devon': 'South West England',
    'East Dorset': 'South West England', 'Exeter': 'South West England', 'Forest of Dean': 'South West England',
    'Gloucester': 'South West England', 'Gloucestershire': 'South West England',
@ -132,7 +135,10 @@ county_to_region_map = {
    'Merthyr Tydfil': 'Wales', 'Monmouthshire': 'Wales', 'Mountain Ash': 'Wales', 'Neath Port Talbot': 'Wales',
    'Newport': 'Wales', 'Pembrokeshire': 'Wales', 'Penarth': 'Wales', 'Pentre': 'Wales', 'Pontyclun': 'Wales',
    'Pontypridd': 'Wales', 'Porth': 'Wales', 'Porthcawl': 'Wales', 'Powys': 'Wales', 'Rhondda Cynon Taff': 'Wales',
-    'Rhoose': 'Wales', 'Sully': 'Wales', 'Swansea': 'Wales', 'The Vale of Glamorgan': 'Wales', 'Tonypandy': 'Wales',
+    'Rhoose': 'Wales', 'Sully': 'Wales', 'Swansea': 'Wales',
+    'The Vale of Glamorgan': 'Wales',
+    'Vale of Glamorgan': 'Wales',
+    'Tonypandy': 'Wales',
    'Torfaen': 'Wales', 'Treharris': 'Wales', 'Treorchy': 'Wales', 'Wrexham': 'Wales', 'Birmingham': 'West Midlands',
    'Bromsgrove': 'West Midlands', 'Cannock Chase': 'West Midlands', 'Coventry': 'West Midlands',
    'Dudley': 'West Midlands', 'East Staffordshire': 'West Midlands', 'Herefordshire': 'West Midlands',
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@ -1,10 +1,14 @@
-def prepare_input_measures(property_recommendations, goal):
+import backend.app.assumptions as assumptions
+
+
+def prepare_input_measures(property_recommendations, goal, needs_ventilation):
    """
    Basic function to convert recommendations_to_upload to a format that is
    suitable for the optimiser - large
    :param property_recommendations:   object containing the recommendations, created in the plan trigger api
    :param goal:    goal to be optimised for, should be one of the keys in gain_map. E.g. if the gain is SAP points,
                    the goal should reflect that desired gain
+    :param needs_ventilation:  boolean to indicate if the property needs ventilation
    :return:    Nested list of input measures
    """

@ -16,23 +20,58 @@ def prepare_input_measures(property_recommendations, goal):
    if not goal_key:
        raise NotImplementedError("Not implemented this gain type - investigate me")

+    # We ony ever have one ventilation measure with now
+    ventilation_recommendation = next(
+        (measure[0] for measure in property_recommendations if measure[0]["type"] == "mechanical_ventilation"),
+        {}
+    )
+
    input_measures = []
    for recs in property_recommendations:

+        if needs_ventilation and recs[0]["type"] == "mechanical_ventilation":
+            # If we house needs ventilation, ventilation will be packaged with the fabric measure so
+            # we don't need to optimise it independently
+            continue
+
        if recs[0]["type"] == "solar_pv":
            # if the recommendation is a solar recommendation with a battery, we exclude it from the optimisation.
            recs = [r for r in recs if ~r["has_battery"]]

-        input_measures.append(
-            [
+        recs_to_append = [rec for rec in recs if rec["energy_cost_savings"] >= 0]
+        if not recs_to_append:
+            continue
+
+        to_append = []
+        for rec in recs:
+            # We bundle the impact of ventilation with the measure
+            total = (
+                rec["total"] + ventilation_recommendation["total"]
+                if rec["type"] in assumptions.measures_needing_ventilation
+                else rec["total"]
+            )
+            gain = (
+                rec[goal_key] + ventilation_recommendation[goal_key]
+                if rec["type"] in assumptions.measures_needing_ventilation
+                else rec[goal_key]
+            )
+
+            rec_type = (
+                "+".join(
+                    [rec["type"], ventilation_recommendation["type"]]
+                ) if rec["type"] in assumptions.measures_needing_ventilation
+                else rec["type"]
+            )
+
+            to_append.append(
                {
                    "id": rec["recommendation_id"],
-                    "cost": rec["total"],
-                    "gain": rec[goal_key],
-                    "type": rec["type"]
+                    "cost": total,
+                    "gain": gain,
+                    "type": rec_type
                }
-                for rec in recs
-            ]
-        )
+            )
+
+        input_measures.append(to_append)

    return input_measures
--- a/recommendations/rdsap_tables.py
+++ b/recommendations/rdsap_tables.py
@ -257,7 +257,7 @@ epc_wall_description_map = {
    "Timber frame, as built, partial insulation": "Timber frame as built",
    "Timber frame, as built, no insulation": "Timber frame as built",
    "Timber frame, with external insulation": "Timber frame with internal insulation",
-
+    "Timber frame, with internal insulation": "Timber frame with internal insulation",
    ############################
    # Sandstone/limestones wall mappings
    ############################
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@ -205,7 +205,7 @@ def get_wall_u_value(

        mapped_value = wall_uvalues_df[
            wall_uvalues_df["Wall_type"] == mapped_description
-        ][age_band].values[0]
+            ][age_band].values[0]

        if pd.isnull(mapped_value) and "Park home" in mapped_description:
            # We don't know enough in this case so we default to 0
@ -428,6 +428,9 @@ def estimate_number_of_floors(property_type):
    Using the property type, we estimate the number of floors in the property
    """

+    if property_type is None:
+        return None
+
    if property_type == "House":
        number_of_floors = 2
    elif property_type in ["Flat", "Bungalow"]:
@ -560,7 +563,7 @@ def get_floor_u_value(
        insulation_lookup = s11[
            s11["Age_band"].str.contains(age_band) & s11["Floor_construction"]
            == floor_type
-        ]
+            ]
        if insulation_lookup.empty:
            insulation_thickness = 0
        else:
--- a/survey_report/app.py
+++ b/survey_report/app.py
@ -0,0 +1,270 @@
+import os
+import requests
+import PyPDF2
+from string import Template
+
+import pandas as pd
+
+from survey_report.extraction.detect_report_type import detect_report_type
+from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor
+
+
+def generate_html_report(template_path, output_path, data):
+    """
+    Reads an HTML template file, injects dynamic values, and generates a final HTML report.
+
+    Args:
+    - template_path (str): Path to the HTML template file.
+    - output_path (str): Path to save the generated HTML file.
+    - data (dict): Dictionary containing dynamic values for the report.
+    """
+    # Read the template file
+    with open(template_path, "r", encoding="utf-8") as f:
+        html_template = Template(f.read())  # Use Template from string module
+
+    # Replace placeholders with actual data
+    final_html = html_template.safe_substitute(data)  # Use safe_substitute to prevent missing key errors
+
+    # Save the generated HTML file
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(final_html)
+
+    print(f"HTML report generated successfully: {output_path}")
+
+
+def stringify_number(num: int, rounding: bool = True) -> str:
+    if num < 100000:  # 5 figures or fewer
+        rounded_num = ((num + 99) // 100) * 100 if rounding else num
+        return f"{rounded_num:,}"
+    else:  # More than 5 figures
+        rounded_num = ((num + 999) // 1000) * 1000 if rounding else num
+        return f"{rounded_num // 1000}k"
+
+
+class PlacidApi:
+    # Errors as defined by docs: https://placid.app/docs/2.0/rest/errors
+    ERROR_CODES = {
+        400: "Bad request",
+        401: "Unauthorized",
+        404: "Template Not found",
+        422: "Validation error",
+        429: "Rate limit exceeded",
+        500: "Internal server error",
+    }
+
+    def __init__(self, api_key):
+        self.api_key = api_key
+
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+        }
+
+    def create_pdf(
+        self,
+        template_uuid: str,
+        current_epc_rating: str,
+        current_epc_rating_colour: str,
+        post_retrofit_epc_rating: str,
+        post_retrofit_epc_rating_colour: str,
+    ):
+        url = "https://api.placid.app/api/rest/pdfs"
+
+        body = {
+            "webhook_success": None,
+            "passthrough": None,
+            "pages": [
+                {
+                    "template_uuid": template_uuid,
+                    "layers": {
+                        "current_epc_rating": {
+                            "text": current_epc_rating,
+                            "text_color": current_epc_rating_colour,
+                        },
+                        "post_retrofit_epc_rating": {
+                            "text": post_retrofit_epc_rating,
+                            "text_color": post_retrofit_epc_rating_colour,
+                        }
+                    },
+                },
+            ]
+        }
+
+        response = requests.post(
+            url,
+            headers=self.headers,
+            json=body
+        )
+
+        response_body = response.json()
+
+        return response_body
+
+    def get_pdf(self, pdf_id: str):
+        """
+        Poll the API every 5 seconds until the PDF is ready
+        """
+        url = f"https://api.placid.app/api/rest/pdfs/{pdf_id}"
+
+        response = requests.get(
+            url,
+            headers=self.headers
+        )
+        response_body = response.json()
+
+        url = response_body["pdf_url"]
+        # Download the PDF form this uurl
+        pdf_download = requests.get(url)
+        with open("survey_report/example_data/output.pdf", "wb") as f:
+            f.write(pdf_download.content)
+
+
+def handler():
+    """
+    Performs the data extraction process for the survey report
+    :return:
+    """
+
+    PLACID_API_KEY = "placid-mpkwidzer2mens9h-hifa3dmbxpfeghpa"
+    TEMPLATE_UUID = "5bst9mh1q9lk9"
+    placid_api = PlacidApi(PLACID_API_KEY)
+
+    current_property_value = 250000  # Needs to be an input
+
+    EPC_COLOURS = {
+        "A": "#117d58",
+        "B": "#2da55c",
+        "C": "#8dbd40",
+        "D": "#f7cd14",
+        "E": "#f3a96a",
+        "F": "#ef8026",
+        "G": "#e41e3b",
+    }
+
+    folders = [
+        {
+            "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 "
+                          "WILLIS ROAD FLAT 1 PRE EPR SITE NOTES.pdf",
+            "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 WILLIS "
+                   "ROAD FLAT 1 PRE EPR PDF.pdf",
+            "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
+                                   "/Flat 1/3 WILLIS ROAD FLAT 1 POST EPR SITE NOTES.pdf"
+        },
+        {
+            "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 "
+                          "WILLIS ROAD FLAT 2 PRE EPR SITE NOTES.pdf",
+            "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 WILLIS "
+                   "ROAD FLAT 2 PRE EPR PDF.pdf",
+            "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
+                                   "/Flat 2/3 WILLIS ROAD FLAT 2 POST EPR SITE NOTES.pdf"
+        },
+        {
+            "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 "
+                          "WILLIS ROAD FLAT 3 PRE EPR SITE NOTES.pdf",
+            "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 WILLIS "
+                   "ROAD FLAT 3 PRE EPR PDF.pdf",
+            "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
+                                   "/Flat 3/3 WILLIS ROAD FLAT 3 POST EPR SITE NOTES.pdf"
+        },
+    ]
+
+    data = []
+    for data_config in folders:
+
+        file_mapping = {}
+        for filename, filepath in data_config.items():
+            with (open(filepath, "rb") as f):
+                pdf = PyPDF2.PdfReader(f)
+                first_page = pdf.pages[0].extract_text()
+                text = ""
+                for page in pdf.pages:
+                    text += page.extract_text()
+
+            # Check the report type
+            report_type = detect_report_type(first_page)
+            if report_type is not None:
+                file_mapping[filename] = text
+
+        # This is only set up to work with quido site notes so we must have it
+        site_notes_extractor = SiteNotesExtractor(file_mapping["site_notes"])
+        site_notes = site_notes_extractor.extract_all()
+
+        # We also must have an EPR
+        epr_extractor = EPRExtractor(file_mapping["epr"])
+        epr = epr_extractor.extract_all()
+
+        # Valuation simulation
+        scenario_site_notes_extractor = SiteNotesExtractor(file_mapping["scenario_site_notes"])
+        scenario_site_notes = scenario_site_notes_extractor.extract_all()
+
+        from backend.ml_models.Valuation import PropertyValuation
+        valuation_uplift = PropertyValuation.estimate_valuation_improvement(
+            current_value=current_property_value,
+            current_epc=site_notes["Current EPC Band"],
+            target_epc=scenario_site_notes["Current EPC Band"],
+        )
+        # TODO - should convert this, when it's more than 5 figures and we should certainly stringify this
+
+        valuation_difference = round(valuation_uplift["average_increased_value"] - current_property_value)
+
+        # Prepare the data for output
+        bill_savings = round(
+            site_notes['Estimated Annual Energy Cost (£)'] - scenario_site_notes['Estimated Annual Energy Cost (£)']
+        )
+
+        carbon_savings = round(
+            site_notes["Current Carbon Emissions (TCO2)"] - scenario_site_notes["Current Carbon Emissions (TCO2)"],
+            2
+        )
+
+        payback_period = None
+        if payback_period is None:
+            raise NotImplementedError("Implement me")
+
+        # We extract the measures from the site notes
+
+        report_data = {
+            "current_epc_rating": site_notes["Current EPC Band"],
+            "current_epc_rating_colour": EPC_COLOURS[site_notes["Current EPC Band"]],
+            "post_retrofit_epc_rating": scenario_site_notes["Current EPC Band"],
+            "post_retrofit_epc_rating_colour": EPC_COLOURS[scenario_site_notes["Current EPC Band"]],
+            "bill_savings": stringify_number(bill_savings),
+            "valuation_improvement": stringify_number(valuation_difference),
+            "carbon_savings": carbon_savings,
+
+        }
+
+        # We now produce the combined data sheet which is the starting figure:
+        # data_sheet = {**epr, **site_notes}
+        # del data_sheet['Building Dimensions']
+        # # We unnest the Total Building Dimensions
+        # data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
+        # data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
+        # del data_sheet["Total Building Dimensions"]
+
+        create_pdf_response = placid_api.create_pdf(
+            template_uuid=TEMPLATE_UUID, **report_data
+        )
+        # {'id': 769832, 'type': 'pdf', 'status': 'queued', 'pdf_url': None, 'transfer_url': None, 'passthrough': None}
+        # Download locally
+        placid_api.get_pdf(create_pdf_response["id"])
+
+    data = pd.DataFrame(data)
+
+    # Generate the HTML report
+    # Placeholder locations
+    template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html"
+    output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html"
+    logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png"
+    generate_html_report(
+        template_path, output_path,
+        data={
+            "address": data_sheet["Address"],
+            "logo_path": logo_path,
+            "current_epc": data_sheet["Current EPC Band"],
+            "current_sap": data_sheet["Current SAP Rating"],
+            "potential_epc": "A",  # TODO PLACEHOLDER
+            "potential_sap": 91,  # TODO PLACEHOLDER
+        }
+    )
--- a/survey_report/extraction/detect_report_type.py
+++ b/survey_report/extraction/detect_report_type.py
@ -0,0 +1,22 @@
+import re
+
+
+def detect_report_type(first_page):
+    """
+    Detects the type of report based on the first page of the report
+    :param first_page:
+    :return:
+    """
+    # Set up for the minute to handle quidos files. We have the Elmhurst logic so we can introduce
+    # this when we need
+
+    if re.match(
+        r"^Created \d{2}/\d{2}/\d{4} for Quidos Ltd using Argyle software BRE approved calculator",
+        first_page
+    ):
+        return "quidos_site_notes"
+
+    if re.search(r"\nIQ-Energy\nEnergy Performance Report\nPage 1 of 1", first_page):
+        return "quidos_epr"
+
+    return None
--- a/survey_report/extraction/quidos.py
+++ b/survey_report/extraction/quidos.py
@ -0,0 +1,256 @@
+import re
+
+
+class SiteNotesExtractor:
+    """
+    Extracts SAP rating, carbon emissions, and building dimensions from an EPC summary report.
+    """
+
+    def __init__(self, pdf_text):
+        """
+        Initializes the SiteNotesExtractor with the extracted PDF text.
+        """
+        self.text = pdf_text
+        self.data = {}
+
+    def extract_sap_rating(self):
+        """
+        Extracts the current and potential SAP rating from the report.
+        """
+        pattern = re.search(r"Current SAP rating\s*([A-G])\s*(\d+)\s*Potential SAP rating\s*([A-G])\s*(\d+)", self.text)
+
+        if not pattern:
+            raise ValueError("No SAP rating found in the report")
+
+        self.data.update({
+            "Current EPC Band": pattern.group(1),
+            "Current SAP Rating": int(pattern.group(2)),
+            "Potential EPC Band": pattern.group(3),
+            "Potential SAP Rating": int(pattern.group(4)),
+        })
+
+    def extract_carbon_emissions(self):
+        """
+        Extracts the current and adjusted annual carbon emissions (TCO2).
+        """
+        pattern = re.search(r"Current annual emissions\s*([\d.]+)\s*\(TCO2\)", self.text)
+
+        if not pattern:
+            raise ValueError("No carbon emissions found in the report")
+
+        self.data.update({
+            "Current Carbon Emissions (TCO2)": float(pattern.group(1)),
+        })
+
+    def extract_building_dimensions(self):
+        """
+        Extracts dimensions for each building part and stores them in a list.
+        Handles Main Property and multiple extensions.
+        """
+
+        # Locate the Dimensions section
+        dimensions_section = re.search(
+            r"Dimension Type (?:internal|external)\nPart Floor Area \(m2\) Room Height \(m\) Loss Perimeter \(m\) "
+            r"Party Wall "
+            r"Length \(m\)\n"
+            r"(.*?)\n5\.0 Conservatory", self.text, re.DOTALL
+        )
+
+        if not dimensions_section:
+            raise ValueError("Failed to locate the dimensions section in the text.")
+
+        dimensions_text = dimensions_section.group(1)
+
+        # Pattern to match each building part (Main Property, Extension 1, Extension 2, etc.)
+        building_part_pattern = re.compile(
+            r"(Main Property|Extension \d+)\s*(?:Property)?\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)"
+        )
+
+        building_parts = []
+        for match in building_part_pattern.finditer(dimensions_text):
+            to_append = {
+                "Building Part": match.group(1).strip(),
+                "Part Floor Area (m2)": float(match.group(2)),
+                "Room Height (m)": float(match.group(3)),
+                "Loss Perimeter (m)": float(match.group(4)),
+                "Party Wall Length (m)": float(match.group(5)),
+            }
+            # We calculate the heat loss area
+            to_append["Heat Loss Area (m2)"] = to_append["Loss Perimeter (m)"] * to_append["Room Height (m)"]
+            building_parts.append(to_append)
+
+        if not building_parts:
+            raise ValueError("No building dimensions found in the report")
+
+        self.data["Building Dimensions"] = building_parts
+        # We calculate some totals
+        self.data["Total Building Dimensions"] = {
+            "floor_area": sum([part["Part Floor Area (m2)"] for part in building_parts]),
+            "heat_loss_area": sum([part["Heat Loss Area (m2)"] for part in building_parts]),
+        }
+
+    def extract_bills_estimate(self):
+        """
+        Extracts the estimated annual energy costs (£) from the report.
+        """
+        pattern = re.search(r"Current annual energy costs £\s*([\d,.]+)", self.text)
+
+        if not pattern:
+            raise ValueError("No bills estimate found in the report")
+
+        self.data["Estimated Annual Energy Cost (£)"] = float(pattern.group(1).replace(",", ""))
+
+    def extract_all(self):
+        """
+        Runs all extraction methods and returns a dictionary with extracted data.
+        """
+        self.extract_sap_rating()
+        self.extract_carbon_emissions()
+        self.extract_bills_estimate()
+        self.extract_building_dimensions()
+
+        # Extract specific measures
+        # Primary wall
+        # Secondary wall
+        # Roof
+        # Floor
+        # Heating system
+        # Hot water system
+        # Windows
+        # Doors
+        # Lighting
+        # Ventilation
+        # Solar
+
+        return self.data
+
+    def extract_walls(self):
+        """
+        Extracts wall type, insulation, dry-lining, and thickness for each building part,
+        including any alternative wall details within the 7.0 Walls section of the summary PDF text.
+        """
+
+        text = self.text
+        wall_data = []
+
+        # Isolate the 7.0 Walls section
+        wall_section_match = re.search(r"7\.0 Walls\n(.*?)\n8\.0 Roofs", text, re.DOTALL)
+        if not wall_section_match:
+            raise ValueError("Failed to locate the walls section in the text.")
+
+        wall_section = wall_section_match.group(1)
+
+        # Define patterns to match walls for each building part
+        wall_pattern = re.compile(
+            r"(?P<section>Main Property(?: Alternative)?|Extension \d+)\s*\n"
+            r"(?:Construction\s*(?P<construction>[^\n]*)\n)?"
+            r"(?:Insulation\s*(?P<insulation>[^\n]*)\n)?"
+            r"(?:Insulation Thickness\(mm\)\s*(?P<insulation_thickness>[^\n]*)\n)?"
+            r"(?:Wall Thickness Measured\?\s*(?P<thickness_measured>[^\n]*)\n)?"
+            r"(?:Wall Thickness\(mm\)\s*(?P<thickness>\d+))?",
+            re.MULTILINE
+        )
+
+        # TODO: We aren't effectively picking up alternative walls
+        # alt_wall_pattern = re.compile(
+        #     r"Alternative Wall Sheltered\s*.*?\n"
+        #     r".*?Construction\s*(?P<alt_construction>[^\n]*)\n"
+        #     r"Insulation\s*(?P<alt_insulation>[^\n]*)\n"
+        #     r"Insulation Thickness\(mm\)\s*(?P<alt_insulation_thickness>[^\n]*)\n"
+        #     r"Wall Thickness Measured\?\s*(?P<alt_thickness_measured>[^\n]*)\n"
+        #     r"Wall Thickness\(mm\)\s*(?P<alt_thickness>\d+)?",
+        #     re.MULTILINE
+        # )
+
+        for match in wall_pattern.finditer(wall_section):
+            building_part = match.group("section")
+            # has_alternative_wall = "Alternative" in building_part
+            building_part = "Main Property" if "Main Property" in building_part else building_part
+
+            wall_entry = {
+                "Building Part": building_part,
+                "Wall Type": match.group("construction") or "Unknown",
+                "Wall Insulation": match.group("insulation") or "Unknown",
+                "Insulation Thickness (mm)": match.group("insulation_thickness") or "Unknown",
+                "Wall Thickness Measured": match.group("thickness_measured") or "Unknown",
+                "Wall Thickness (mm)": int(match.group("thickness")) if match.group("thickness") and match.group(
+                    "thickness").isdigit() else None,
+                "Alternative Wall Type": None,
+                "Alternative Wall Insulation": None,
+                "Alternative Insulation Thickness (mm)": None,
+                "Alternative Wall Thickness Measured": None,
+                "Alternative Wall Thickness (mm)": None,
+            }
+
+            # Check if an alternative wall section exists
+            # if has_alternative_wall:
+            #     alt_match = alt_wall_pattern.search(wall_section, match.end())
+            #     if alt_match:
+            #         wall_entry["Alternative Wall Type"] = alt_match.group("alt_construction") or "Unknown"
+            #         wall_entry["Alternative Wall Insulation"] = alt_match.group("alt_insulation") or "Unknown"
+            #         wall_entry["Alternative Insulation Thickness (mm)"] = alt_match.group(
+            #             "alt_insulation_thickness") or "Unknown"
+            #         wall_entry["Alternative Wall Thickness Measured"] = alt_match.group(
+            #             "alt_thickness_measured") or "Unknown"
+            #         wall_entry["Alternative Wall Thickness (mm)"] = int(
+            #             alt_match.group("alt_thickness")) if alt_match.group("alt_thickness") and alt_match.group(
+            #             "alt_thickness").isdigit() else None
+
+            wall_data.append(wall_entry)
+
+        return wall_data
+
+
+class EPRExtractor:
+    """
+    Extracts space heating, water heating, and address from an Energy Performance Report (EPR).
+    """
+
+    def __init__(self, pdf_text):
+        """
+        Initializes the EPRExtractor with the extracted PDF text.
+        """
+        self.text = pdf_text
+        self.data = {}
+
+    def extract_heating_consumption(self):
+        """
+        Extracts space heating and water heating values from the report.
+        """
+        pattern = re.search(
+            r"Space Heating\(KWH\)\s*([\d,]+).*?\nWater Heating\(KWH\)\s*([\d,]+)",
+            self.text,
+            re.DOTALL
+        )
+
+        if not pattern:
+            raise ValueError("No heating data found in the report")
+
+        self.data.update({
+            "Space Heating (KWH)": int(pattern.group(1).replace(",", "")),
+            "Water Heating (KWH)": int(pattern.group(2).replace(",", ""))
+        })
+
+    def extract_address(self):
+        """
+        Extracts the full address from the report.
+        """
+        pattern = re.search(
+            r"Address\s*(.*?)\nTown\s*(.*?)\n",
+            self.text,
+            re.DOTALL
+        )
+
+        if not pattern:
+            raise ValueError("No address found in the report")
+
+        full_address = pattern.group(1).strip()
+        self.data["Address"] = full_address
+
+    def extract_all(self):
+        """
+        Runs all extraction methods and returns a dictionary with extracted data.
+        """
+        self.extract_address()
+        self.extract_heating_consumption()
+        return self.data
--- a/survey_report/requirements.txt
+++ b/survey_report/requirements.txt
--- a/survey_report/template.html
+++ b/survey_report/template.html
@ -0,0 +1,123 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Domna Energy Report</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #ffffff;
+            color: #333;
+            margin: 0;
+            padding: 0;
+            display: flex;
+            justify-content: center;
+        }
+        .container {
+            width: 100%;
+            max-width: 1300px;
+            margin: 20px auto;
+        }
+        .header {
+            background-color: #1B1F3B;
+            color: white;
+            padding: 30px;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            border-radius: 12px;
+        }
+        .header h1 {
+            margin: 5;
+            font-size: 24px;
+        }
+        .header p {
+            margin: 5px 0 0;
+            font-size: 16px;
+            color: #d1d5db;
+        }
+        .logo img {
+            height: 60px;
+        }
+
+        /* EPC Rating Cards */
+        .epc-container {
+            display: flex;
+            justify-content: space-between;
+            gap: 20px;
+            margin-top: 30px;
+        }
+        .epc-card {
+            background-color: white;
+            border: 2px solid #ccc;
+            border-radius: 10px;
+            padding: 20px;
+            flex: 1;
+            display: flex;
+            flex-direction: column;
+            justify-content: space-between; /* Pushes SAP to bottom */
+            align-items: center;
+            text-align: center;
+            box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
+            position: relative;
+            height: 160px;
+        }
+        .epc-title {
+            font-size: 18px;
+            font-weight: bold;
+            color: #666;
+        }
+        .epc-rating {
+            font-size: 50px;
+            font-weight: bold;
+        }
+        .sap-rating {
+            font-size: 18px;
+            color: #555;
+            position: absolute;
+            bottom: 10px;
+            right: 20px;
+        }
+        .before .epc-rating {
+            color: #1B1F3B; /* Medium Blue */
+        }
+        .after .epc-rating {
+            color: #D4AF37; /* Gold */
+        }
+
+    </style>
+</head>
+<body>
+
+    <div class="container">
+        <!-- Header Section -->
+        <div class="header">
+            <div>
+                <h1>Domna Energy Report</h1>
+                <p>${address}</p> <!-- Address Placeholder -->
+            </div>
+            <div class="logo">
+                <img src="${logo_path}" alt="Domna Logo">
+            </div>
+        </div>
+
+        <!-- EPC Rating Cards -->
+        <div class="epc-container">
+            <div class="epc-card before">
+                <div class="epc-title">Current EPC Rating</div>
+                <div class="epc-rating">${current_epc}</div>
+                <div class="sap-rating">SAP ${current_sap}</div>
+            </div>
+
+            <div class="epc-card after">
+                <div class="epc-title">Potential EPC Rating</div>
+                <div class="epc-rating">${potential_epc}</div>
+                <div class="sap-rating">SAP ${potential_sap}</div>
+            </div>
+        </div>
+
+    </div>
+
+</body>
+</html>
--- a/utils/OsmosisCondtionReportParser.py
+++ b/utils/OsmosisCondtionReportParser.py
@ -0,0 +1,49 @@
+import re
+import boto3
+import PyPDF2
+import fitz
+
+
+class OsmosisConditionReportParser:
+
+    def __init__(self, filekey, bucket_name=None):
+        self.s3_client = boto3.client('s3')
+        self.bucket_name = bucket_name
+        self.filekey = filekey
+        self.pdf_text = None
+
+        self._read_file()
+
+    def _read_file(self):
+        """
+        Reads the XML file either locally or from S3 and parses it using minidom.
+
+        Raises:
+            ValueError: If the file cannot be found, read, or parsed.
+        """
+
+        chunk_size = 10
+
+        try:
+            if self.bucket_name:
+                # Read from S3
+                raise NotImplementedError("Imeplement me")
+            else:
+
+                with fitz.open(self.filekey) as pdf:
+                    text = ""
+                    for page in pdf:
+                        text += page.get_text()
+
+            # Parse the XML content using minidom
+            self.pdf_text = text
+        except FileNotFoundError:
+            raise ValueError(f"Local file not found: {self.filekey}")
+        except Exception as e:
+            raise ValueError(f"An error occurred while reading or parsing the XML: {e}")
+
+    def extract(self):
+        return {
+            "No. of Bedrooms": int(re.search(r"No\. of Bedrooms \(Total\)\s*(\d+)", self.pdf_text).group(1)),
+            "Risk Assessment Pathway": re.search(r"Risk\s*Assessment\s*Pathway\s*([A-Z])", self.pdf_text).group(1)
+        }
--- a/utils/file_data_extraction.py
+++ b/utils/file_data_extraction.py
--- a/Show more
+++ b/Show more