diff --git a/.idea/terraform.xml b/.idea/terraform.xml
new file mode 100644
index 00000000..cd46a3d3
--- /dev/null
+++ b/.idea/terraform.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="TerraformProjectSettings">
+    <option name="toolPath" value="/opt/homebrew/bin/terraform" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
new file mode 100644
index 00000000..306edd99
--- /dev/null
+++ b/asset_list/AssetList.py
@@ -0,0 +1,1518 @@
+import hashlib
+import os
+import re
+import tiktoken
+from pprint import pprint
+from datetime import datetime
+from openai import OpenAI
+import numpy as np
+import pandas as pd
+from fuzzywuzzy import process
+from utils.logger import setup_logger
+from backend.SearchEpc import SearchEpc
+from BaseUtility import Definitions
+import asset_list.mappings.property_type as property_type_mappings
+import asset_list.mappings.walls as walls_mappings
+import asset_list.mappings.heating_systems as heating_mappings
+import asset_list.mappings.exising_pv as existing_pv_mappings
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+logger = setup_logger()
+
+# OpenAI API Key (set this in your environment variables for security)
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+
+
+class DataRemapper:
+    def __init__(self, standard_values, standard_map=None, max_tokens=1000):
+        """
+        Initialize the remapper with standard values and a predefined mapping.
+
+        :param standard_values: Set of allowed standardized values.
+        :param standard_map: Dictionary of common remappings {raw_value: standard_value}.
+        """
+        self.standard_values = standard_values
+        self.standard_map = standard_map
+        self.fuzzy_threshold = 90  # Adjust fuzzy matching sensitivity
+        self.ai_model = "gpt-4-turbo"  # Use gpt-3.5-turbo for cheaper processing
+
+        # Tokenizer for counting tokens
+        self.tokenizer = tiktoken.encoding_for_model(self.ai_model)
+
+        # Track token usage and remap dictionary
+        self.total_tokens_used = 0
+        self.total_cost = 0
+        self.remap_dict = {}  # {original_value: standardized_value}
+        self.max_tokens = max_tokens  # Limit for OpenAI API
+
+        # Memoization for AI calls
+        self.ai_cache = {}  # {tuple(unmapped_values): {original_value: standardized_value}}
+        # Capture the reponse for debugging
+        self.ai_response = None
+
+        # OpenAI pricing (as of Feb 2024)
+        self.pricing = {
+            "gpt-4-turbo": {"input": 0.01 / 1000, "output": 0.03 / 1000},
+            "gpt-3.5-turbo": {"input": 0.0015 / 1000, "output": 0.002 / 1000},
+        }
+
+        self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
+
+    @staticmethod
+    def clean_string(text):
+        """Basic text cleaning: remove extra spaces, punctuation, and normalize case."""
+        if not isinstance(text, str):
+            return None
+        text = text.strip().lower()
+        text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
+        # Replace double strings
+        text = re.sub(r'\s+', ' ', text)
+        return text
+
+    def fuzzy_match(self, text):
+        """Use fuzzy matching to find the closest standard value."""
+        match, score = process.extractOne(text, self.standard_values) if text else (None, 0)
+        return match if score >= self.fuzzy_threshold else None
+
+    def count_tokens(self, text):
+        """Estimate the number of tokens in a given text."""
+        return len(self.tokenizer.encode(text)) if text else 0
+
+    def ai_standardize(self, unmapped_values):
+        """Call OpenAI API **once** for all unmapped values to minimize cost, with memoization."""
+        if not unmapped_values:
+            return {}
+
+        unmapped_tuple = tuple(sorted(unmapped_values))  # Ensure consistency for memoization
+        if unmapped_tuple in self.ai_cache:
+            return self.ai_cache[unmapped_tuple]  # Return memoized result
+
+        prompt = f"""
+        You are an expert in data classification. Standardize each of these values into one of the categories: 
+        {list(self.standard_values)}. 
+
+        Return only a JSON dictionary where:
+        - The keys are the original values.
+        - The values are the standardized ones.
+
+        Strictly return JSON **without markdown formatting** or extra text.
+
+        Example Output:
+        {{
+            "BLKHOUS": "block house",
+            "BEDSIT": "bedsit"
+        }}
+
+        Values to standardize:
+        {unmapped_values}
+        """
+
+        # Count input tokens
+        input_tokens = self.count_tokens(prompt)
+        if input_tokens > self.max_tokens:
+            raise ValueError("Input tokens exceed the maximum limit.")
+
+        logger.info("Calling OpenAI API for standardization...")
+        response = self.openai_client.chat.completions.create(
+            model=self.ai_model,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=self.max_tokens,
+            temperature=0.1,
+        )
+
+        output_text = response.choices[0].message.content.strip()
+        output_tokens = self.count_tokens(output_text)  # Count output tokens
+
+        # Track total token usage
+        self.total_tokens_used += input_tokens + output_tokens
+
+        # Estimate cost
+        input_cost = input_tokens * self.pricing[self.ai_model]["input"]
+        output_cost = output_tokens * self.pricing[self.ai_model]["output"]
+        self.total_cost += input_cost + output_cost
+
+        try:
+            # Parse response as dictionary
+            mapping = eval(output_text)  # OpenAI should return a valid dictionary
+        except:
+            mapping = {val: "unknown" for val in unmapped_values}  # Fallback
+
+        # Memoize the AI response
+        self.ai_cache[unmapped_tuple] = mapping
+        # We store the raw AI response for debugging
+        logger.debug(f"AI Response: {mapping}")
+        self.ai_response = output_text
+
+        return mapping
+
+    def standardize_list(self, values_to_remap):
+        """
+        Standardizes a list of values and returns a dictionary {original_value: standardized_value}.
+
+        :param values_to_remap: List of raw values to standardize.
+        :return: Dictionary {original_value: standardized_value}.
+        """
+        unique_values = set(values_to_remap)  # Process only unique values
+
+        unmapped_values = []
+        for value in unique_values:
+            if pd.isna(value):  # Handle NaN values
+                self.remap_dict[value] = "unknown"
+                continue
+
+            cleaned_value = self.clean_string(value)
+
+            # Rule-Based Check (Predefined Mapping)
+            if cleaned_value in self.standard_map or value in self.standard_map:
+                self.remap_dict[value] = (
+                    self.standard_map[cleaned_value] if cleaned_value in self.standard_map else self.standard_map[value]
+                )
+                continue
+
+            if value.lower() in self.standard_map:
+                self.remap_dict[value] = self.standard_map[value.lower()]
+                continue
+
+            # Exact Match in Standard Values
+            if cleaned_value in self.standard_values:
+                self.remap_dict[value] = cleaned_value
+                continue
+
+            # Fuzzy Matching
+            fuzzy_match = self.fuzzy_match(cleaned_value)
+            if fuzzy_match:
+                self.remap_dict[value] = fuzzy_match
+                continue
+
+            # Capture anything that wasn't mapped
+            unmapped_values.append(value)
+
+        # AI Model - remap anything unmapped (batch request)
+        ai_mapping = self.ai_standardize(unmapped_values)
+        self.remap_dict.update(ai_mapping)
+
+        return self.remap_dict
+
+    def report_usage(self):
+        """Prints a summary of token usage and cost."""
+        print(f"\n🔹 Total Tokens Used: {self.total_tokens_used}")
+        print(f"💰 Estimated Cost: ${self.total_cost:.4f}")
+
+
+class AssetList:
+    """
+    This class is used to standardise asset lists so that we can process the core information in a consistent manner.
+    """
+
+    EPC_API_DATA_NAMES = {
+        "uprn": "epc_os_uprn",
+        "address1": "epc_address1",
+        "address": "epc_address",
+        "postcode": "epc_postcode",
+        "inspection-date": "epc_inspection_date",
+        "current-energy-efficiency": "epc_sap_score_on_register",
+        "current-energy-rating": "epc_rating_on_register",
+        "property-type": "epc_property_type",
+        "built-form": "epc_archetype",
+        "total-floor-area": "epc_total_floor_area",
+        "construction-age-band": "epc_age_band",
+        "floor-height": "epc_floor_height",
+        "number-habitable-rooms": "epc_number_habitable_rooms",
+        "walls-description": "epc_wall_construction",
+        "roof-description": "epc_roof_construction",
+        "floor-description": "epc_floor_construction",
+        "mainheat-description": "epc_heating_type",
+        'mainheatcont-description': "epc_heating_controls",
+        "secondheat-description": "epc_secondary_heating",
+        "transaction-type": "epc_reason",
+        "energy-consumption-current": "epc_heat_demand",
+        "photo-supply": "epc_photo_supply",
+        "estimated": "estimated"
+    }
+    FIND_EPC_DATA_NAMES = {
+        "heating_text": "epc_estiamted_heating_kwh",
+        "hot_water_text": "epc_estimated_hotwater_kwh",
+        'Assessor’s name': "epc_assessor_name",
+        "Assessor's Telephone": "epc_assessor_telephone",
+        "Assessor's Email": "epc_assessor_email",
+        "Accreditation scheme": "epc_assessor_accreditation",
+        "Assessor’s ID": "epc_assessor_id",
+        "Solar photovoltaics": "epc_solar_pv"
+    }
+
+    DATETIME_REMAP = {
+        "Pre 1900": datetime(year=1899, month=12, day=31),
+    }
+
+    # These are the accepted methods we have for cleaning the address1 column
+    ADDRESS_1_CLEANING_METHODS = [
+        "first_two_words",  # This method will split on the fist two words, where the separator is a space
+        "first_word",  # This method will split on the first word, where the separator is a space
+        "house_number_extraction",  # This method will use the NLP model in SearchEPC to extract the housenumber
+        # "address1_extraction"  # This method will use the NLP model to extract address1
+    ]
+
+    # Standard column Names
+    STANDARD_ADDRESS_1 = "domna_address_1"
+    STANDARD_POSTCODE = "domna_postcode"
+    STANDARD_FULL_ADDRESS = "domna_full_address"
+    STANDARD_YEAR_BUILT = "landlord_year_built"
+    STANDARD_UPRN = "ordnance_survey_uprn"
+    STANDARD_LANDLORD_PROPERTY_ID = "landlord_property_id"
+    STANDARD_PROPERTY_TYPE = "landlord_property_type"
+    STANDARD_WALL_CONSTRUCTION = "landlord_wall_construction"
+    STANDARD_HEATING_SYSTEM = "landlord_heating_system"
+    STANDARD_EXISTING_PV = "landlord_existing_pv"
+
+    DOMNA_PROPERTY_ID = "domna_property_id"
+
+    # Regular expression for identifying if the address might point to multiple units
+    MULTI_UNIT_REGEX = re.compile(r'\b([A-Za-z0-9]+)-([A-Za-z0-9]+)\b')
+
+    # List of columns relating to the non-intrusive data
+    NON_INTRUSIVES_COLNAMES = [
+        "Archetype", "Construction", "Insulated", "Material", "CIGA Check Required",
+        "PV, ACCESS ISSUE, SEE NOTES", "OFF GAS - ROOF ORIENTATION",
+        "Any further surveyor notes", 'Surveyors Name'
+    ]
+
+    # This SAP threshold is a key search criteria for properties that may be eligible for extraction
+    FILLED_CAVITY_SAP_THRESHOLD = 75
+    # This SAP the
+    EMPTY_CAVITY_SAP_THRESHOLD = 75
+    # Any EPC deemed to have been conducted prior to this year is deemed to be unreliable
+    EPC_YEAR_THRESHOLD = pd.Timestamp.now().year - 5
+
+    # Attributes - these are columns that we produce, calcualted based on other pieces of data
+    ATTRIBUTE_HAS_SOLAR = "attribute_has_solar"
+    ATTRIBUTE_NUMBER_OF_FLOORS = "attribute_est_number_floors"
+    ATTRIBUTE_ESTIMATED_PERIMETER = "attribute_est_perimter"
+    ATTRIBUTE_HEAT_LOSS_AREA = "attribute_heat_loss_area"
+    ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS = "attribute_epc_roof_insulation_thickness"
+    ATTRIBUTE_SAP_THRESHOLD_AND_BELOW = f"sap_rating_{FILLED_CAVITY_SAP_THRESHOLD}_and_below"
+    ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD = f"epc_is_pre_{EPC_YEAR_THRESHOLD}"
+
+    # These are the descriptions that we look for in the EPC data that are indicative of no insulation
+    EPC_NO_WALL_INSULATION_DESCRIPTIONS = [
+        "cavity wall, as built, no insulation (assumed)",
+        "cavity wall, as built, partial insulation (assumed)",
+        "cavity wall, as built, partial insulation",
+        "cavity wall, as built, no insulation",
+    ]
+
+    # List of strings that we look for in the EPC data, where substrings indicate that the wall is insulated
+    EPC_INSULATED_WALLS_SUBSTRINGS = [
+        ", insulated", "with external insulation", "with internal insulation", "filled cavity"
+    ]
+
+    # List of strings that we look for in the EPC data, where substrings indicate that the roof is insulated
+    EPC_INSULATED_ROOF_SUBSTRINGS = [
+        "(another dwelling above)", ", insulated", ", insulated (assumed) ",
+        ", ceiling insulated",
+    ]
+
+    def __init__(
+        self,
+        local_filepath,
+        sheet_name,
+        address1_colname,
+        postcode_colname,
+        full_address_colname,
+        landlord_property_id=None,
+        full_address_cols_to_concat=None,
+        missing_postcodes_method=None,
+        address1_extraction_method=None,
+        landlord_year_built=None,
+        landlord_uprn=None,
+        landlord_property_type=None,
+        landlord_wall_construction=None,
+        landlord_heating_system=None,
+        landlord_existing_pv=None,
+        header=0
+    ):
+        self.local_filepath = local_filepath
+        self.sheet_name = sheet_name
+        # Read in the data
+        self.raw_asset_list = pd.read_excel(local_filepath, header=header, sheet_name=sheet_name)
+        self.standardised_asset_list = self.raw_asset_list.copy()
+        # Will be used to store aggregated figures against the various work types
+        self.work_type_figures = {}
+        self.work_type_breakdowns = {}
+        self.flat_data = None
+        self.duplicated_addresses = None
+
+        # We detect the presence of the non-intrusive columns
+        self.non_intrusives_present = True if "CIGA Check Required" in self.raw_asset_list.columns else False
+
+        # Names of columns
+        self.landlord_property_id = landlord_property_id
+        self.address1_colname = address1_colname
+        self.postcode_colname = postcode_colname
+        self.full_address_colname = full_address_colname
+        self.landlord_year_built = landlord_year_built
+        self.landlord_uprn = landlord_uprn
+        self.landlord_property_type = landlord_property_type
+        self.landlord_wall_construction = landlord_wall_construction
+        self.landlord_heating_system = landlord_heating_system
+        self.landlord_existing_pv = landlord_existing_pv
+
+        # parameters for cleaning
+        self.full_address_cols_to_concat = full_address_cols_to_concat
+        self.missing_postcodes_method = missing_postcodes_method
+        self.address1_extraction_method = address1_extraction_method
+
+        self.debug_information = {
+            "property_type": None,
+            "wall_construction": None,
+            "heating_system": None,
+            "existing_pv": None
+        }
+
+        self.variable_mappings = {}
+
+        self.rename_map = {}
+        self.keep_variables = []
+
+        # Finally, we handle the case where the landlord's property ID is actually the OS UPRN
+        if (self.landlord_uprn == self.landlord_property_id) and (self.landlord_property_id is not None):
+            self.standardised_asset_list[self.STANDARD_UPRN] = self.standardised_asset_list[self.landlord_uprn].copy()
+            # Update the reference to landlord UPRn
+            self.landlord_uprn = self.STANDARD_UPRN
+
+    def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"):
+
+        if method not in self.ADDRESS_1_CLEANING_METHODS:
+            raise ValueError(f"Method {method} for producing address1 not recognized")
+
+        if method == "first_two_words":
+            asset_list[self.address1_colname] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
+            return asset_list
+
+        if method == "first_word":
+            asset_list[self.address1_colname] = asset_list[full_address_col].str.split(" ").str[0]
+            return asset_list
+
+        if method == "house_number_extraction":
+            asset_list[self.address1_colname] = asset_list.apply(
+                lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
+                axis=1
+            )
+            return asset_list
+
+        raise ValueError(f"Method {method} not recognized")
+
+    @staticmethod
+    def _address1_extraction(x):
+        pass
+
+    def create_property_id(self):
+        """
+        This function creates the domna property ID, which is simply a hash of the full address and postcode
+        We want all figures to be positive
+        :return:
+        """
+
+        # We'll remove punctuation and whitespace from the address, before hashing to produce an ID
+
+        def _make_hash(value):
+            """Generates a stable SHA256 hash suffix and appends it to a cleaned version of the value."""
+            # Normalize and remove special characters for cleaner ID
+            cleaned_value = re.sub(r"[^\w\s-]", "", value).replace(" ", "_").lower()
+
+            # Generate SHA-256 hash and truncate it
+            short_hash = hashlib.sha256(value.encode()).hexdigest()[:12]
+
+            return f"{cleaned_value}-{short_hash}"
+
+        # Apply transformation
+        self.standardised_asset_list[self.DOMNA_PROPERTY_ID] = (
+            self.standardised_asset_list[self.full_address_colname] +
+            self.standardised_asset_list[self.postcode_colname]
+        ).str.strip().str.replace(r"[^\w\s]", "", regex=True).str.replace(" ", "").str.lower().apply(_make_hash)
+
+    @staticmethod
+    def _strip_postcode_from_full_address(full_address, postcode):
+        cleaned = full_address.replace(postcode, "")
+        # Remove any trailing commas and spaces
+        cleaned = cleaned.rstrip(", ").strip(",").strip()
+        return cleaned
+
+    @classmethod
+    def _identify_multi_address(cls, address):
+        # We check if the address is comma separated
+        if "," in address:
+            address1_section = address.split(",")[0]
+            # We look for string in the form (x-y)
+            return bool(cls.MULTI_UNIT_REGEX.search(address1_section))
+
+    @staticmethod
+    def _convert_uprn(x):
+        """
+        Used to convert UPRNS to integer strings
+        :param x: uprn to convert
+        :return: converted uprn
+        """
+
+        if pd.isnull(x):
+            return x
+
+        # check if numeric
+        if np.isreal(x):
+            return str(int(x))
+
+        if str(x).isdigit():
+            return str(int(x))
+        return x
+
+    def init_standardise(self):
+        """
+        This function is used to standardise the asset list
+        :return: standardised asset list
+        """
+
+        # Remove rows without a postcode
+        if self.postcode_colname is not None:
+            self.standardised_asset_list = self.standardised_asset_list.dropna(subset=[self.postcode_colname])
+
+        # We clean up portential non-breaking spaces, and double spaces
+        for col in [
+            c for c in [self.postcode_colname, self.full_address_colname, self.address1_colname] if
+            c is not None
+        ]:
+            self.standardised_asset_list[col] = self.standardised_asset_list[col].astype(str)
+            self.standardised_asset_list[col] = self.standardised_asset_list[col].str.replace('\xa0', ' ', regex=False)
+            self.standardised_asset_list[col] = self.standardised_asset_list[col].str.replace('  ', ' ', regex=False)
+
+        if self.address1_colname is None:
+            if self.address1_extraction_method is None:
+                raise ValueError("Missing address 1 - please specify an extraction method")
+            self.address1_colname = self.STANDARD_ADDRESS_1
+            # If we do not have this, we produce it
+            self.standardised_asset_list = self._extract_address1(
+                asset_list=self.standardised_asset_list,
+                full_address_col=self.full_address_colname,
+                postcode_col=self.postcode_colname,
+                method=self.address1_extraction_method
+            )
+
+        if self.full_address_colname is None:
+            if not self.full_address_cols_to_concat:
+                raise ValueError("Missing full address - please specify columns to concatenate")
+            self.full_address_colname = self.STANDARD_FULL_ADDRESS
+            self.standardised_asset_list[self.full_address_colname] = (
+                self.standardised_asset_list[self.full_address_cols_to_concat].apply(lambda x: ", ".join(x), axis=1)
+            )
+        else:
+
+            # Make sure to strip the postcode out of the full address
+            self.standardised_asset_list[self.full_address_colname] = self.standardised_asset_list.apply(
+                lambda x: self._strip_postcode_from_full_address(
+                    full_address=x[self.full_address_colname],
+                    postcode=x[self.postcode_colname]
+                ),
+                axis=1
+            )
+
+        # We create the domna property id
+        self.create_property_id()
+
+        # Clean up the UPRN column, if the landlord has provided them
+        if self.landlord_uprn is not None:
+            self.standardised_asset_list[self.landlord_uprn] = (
+                self.standardised_asset_list[self.landlord_uprn].apply(self._convert_uprn)
+            )
+
+        # We keep just the columns we care about and will work through the various columns and standardise
+        variables = [
+            self.landlord_property_id,
+            self.DOMNA_PROPERTY_ID,
+            self.address1_colname,
+            self.postcode_colname,
+            self.full_address_colname,
+            self.landlord_uprn,
+            self.landlord_property_type,
+            self.landlord_year_built,
+            self.landlord_wall_construction,
+            self.landlord_heating_system,
+            self.landlord_existing_pv
+        ]
+        # Keep just non-null variables (e.g landlord may not provide uprn
+        self.keep_variables = [v for v in variables if v is not None]
+        self.rename_map = {
+            self.landlord_property_id: self.STANDARD_LANDLORD_PROPERTY_ID,
+            self.address1_colname: self.STANDARD_ADDRESS_1,
+            self.postcode_colname: self.STANDARD_POSTCODE,
+            self.full_address_colname: self.STANDARD_FULL_ADDRESS,
+            self.landlord_uprn: self.STANDARD_UPRN,
+            self.landlord_property_type: self.STANDARD_PROPERTY_TYPE,
+            self.landlord_year_built: self.STANDARD_YEAR_BUILT,
+            self.landlord_wall_construction: self.STANDARD_WALL_CONSTRUCTION,
+            self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM,
+            self.landlord_existing_pv: self.STANDARD_EXISTING_PV
+        }
+        self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}
+
+        if self.non_intrusives_present:
+            self.keep_variables += self.NON_INTRUSIVES_COLNAMES
+            self.rename_map = {
+                **self.rename_map,
+                **dict(
+                    zip(self.NON_INTRUSIVES_COLNAMES, ["non-intrusives: " + c for c in self.NON_INTRUSIVES_COLNAMES])
+                )
+            }
+
+        # We idenfiy addresses which are likely to be multi-addresses (i.g are rooms x-y)
+        self.standardised_asset_list["is_multi_address"] = self.standardised_asset_list[
+            self.full_address_colname
+        ].apply(lambda x: self._identify_multi_address(x))
+
+        # We handle cleaning for walls, in the instance that the landlord provides us with EPC data and
+        # we see instances of "average thermal transmittance" in the description
+        self.standardised_asset_list[self.landlord_wall_construction] = np.where(
+            self.standardised_asset_list[self.landlord_wall_construction].str.lower().str.contains(
+                "average thermal transmittance"
+            ) == True,
+            "new build - average thermal transmittance",
+            self.standardised_asset_list[self.landlord_wall_construction]
+        )
+
+        # Clear our build year column
+        # We attempt to process the year built column
+        if self.landlord_year_built is not None:
+            # We check if we have a datetime - year built has not been renamed
+            if isinstance(self.standardised_asset_list[self.landlord_year_built].iloc[0], datetime):
+                # We treat any string columns - with common values we see
+                self.standardised_asset_list[self.landlord_year_built] = (
+                    self.standardised_asset_list[self.landlord_year_built].replace(self.DATETIME_REMAP)
+                )
+
+                self.standardised_asset_list[self.landlord_year_built] = pd.to_datetime(
+                    self.standardised_asset_list[self.landlord_year_built]
+                )
+                # Convert this to year
+                self.standardised_asset_list[self.landlord_year_built] = (
+                    self.standardised_asset_list[self.landlord_year_built].dt.year
+                )
+            else:
+                # We attempt to convert the year built to a datetime, by detecting the format and converting
+
+                def extract_year(date_str):
+                    """
+                    Extracts the year from a date string in the format '01-Jul-YYYY'.
+                    Returns the extracted year as an integer or None if the format is incorrect.
+                    """
+                    known_errors = ["#MULTIVALUE"]
+
+                    if pd.isnull(date_str) or date_str in known_errors:
+                        return None
+
+                    if isinstance(date_str, str):
+                        match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str)
+                        if match:
+                            return int(match.group(1))  # Extract the year and convert to integer
+
+                    if isinstance(date_str, datetime):
+                        return date_str.year
+
+                    # Check if date_str is a year itself
+                    if str(date_str).isdigit() & (len(str(date_str)) == 4):
+                        return int(date_str)
+
+                    raise NotImplementedError("Unhandled format for year built - implement me")
+
+                self.standardised_asset_list[self.landlord_year_built] = self.standardised_asset_list[
+                    self.landlord_year_built
+                ].apply(extract_year)
+
+        # We now create standard lookups
+        to_remap = {
+            self.landlord_property_type: {
+                "standard_values": property_type_mappings.STANDARD_PROPERTY_TYPES,
+                "standard_map": property_type_mappings.PROPERTY_MAPPING
+            },
+            self.landlord_wall_construction: {
+                "standard_values": walls_mappings.STANDARD_WALL_CONSTRUCTIONS,
+                "standard_map": walls_mappings.WALL_CONSTRUCTION_MAPPINGS
+            },
+            self.landlord_heating_system: {
+                "standard_values": heating_mappings.STANDARD_HEATING_SYSTEMS,
+                "standard_map": heating_mappings.HEATING_MAPPINGS
+            },
+            self.landlord_existing_pv: {
+                "standard_values": existing_pv_mappings.STANDARD_EXISTING_PV,
+                "standard_map": existing_pv_mappings.EXISTING_PV_MAPPINGS
+            }
+        }
+        # Keep just entries where the key is not None
+        to_remap = {k: v for k, v in to_remap.items() if k is not None}
+
+        for variable, config in to_remap.items():
+            logger.info("Standardising variable: %s", variable)
+            values_to_remap = self.standardised_asset_list[variable].unique()
+            # We want to map this to our standardised list of property types we're interested in
+            remapper = DataRemapper(standard_values=config["standard_values"], standard_map=config["standard_map"])
+            remap_dictionary = remapper.standardize_list(values_to_remap=values_to_remap.tolist())
+            self.variable_mappings[variable] = remap_dictionary
+
+        # We now print out the variable mappings, which can be reviewed by the user, before the final standardised
+        # asset list is returned
+        for variable, mapping in self.variable_mappings.items():
+            pprint(f"Variable: {variable}")
+            pprint(mapping)
+            # Print a space
+            print("\n")
+            pprint("=======================================")
+
+    def apply_standardiation(self, override_empty_mappings=False):
+        """
+        This function applies the standardisation to the asset list
+        :param override_empty_mappings: If true, will override the check for empty mappings. This is only relevant
+        if there are no categories which need remapping which is highly unlikely
+        :return:
+        """
+        if not self.variable_mappings and not override_empty_mappings:
+            raise ValueError("Please run init_standardise first")
+
+        logger.info("Applying standardisation to asset list")
+
+        for variable, mapping in self.variable_mappings.items():
+            self.standardised_asset_list[variable + "_original_from_landlord"] = (
+                self.standardised_asset_list[variable].copy()
+            )
+            self.standardised_asset_list[variable] = self.standardised_asset_list[variable].map(mapping)
+
+        if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum():
+            # Drop the dupes
+            pprint(
+                f"There are {self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum()} duplicated "
+                f"addresses - dropping"
+            )
+
+            # Keep a record of duplicates
+            self.duplicated_addresses = self.standardised_asset_list[
+                self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated()
+            ][[self.DOMNA_PROPERTY_ID, self.address1_colname, self.postcode_colname]].copy()
+
+            self.standardised_asset_list = self.standardised_asset_list[
+                ~self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated()
+            ]
+
+        # Apply renames to our standard names
+        # Perform final variable selection and renaming:
+
+        # We add the original columns to the keep variables
+        self.keep_variables += [
+            k + "_original_from_landlord" for k in self.variable_mappings.keys()
+        ]
+
+        self.standardised_asset_list = self.standardised_asset_list[self.keep_variables].rename(
+            columns=self.rename_map
+        )
+
+        # We fill any standard columns that are not in the data because they were not provided by the landlord
+        missing_variables = [
+            v for v in [
+                self.STANDARD_EXISTING_PV,
+                self.STANDARD_HEATING_SYSTEM,
+                self.STANDARD_UPRN,
+                self.STANDARD_PROPERTY_TYPE,
+                self.STANDARD_YEAR_BUILT,
+                self.STANDARD_WALL_CONSTRUCTION,
+                self.STANDARD_HEATING_SYSTEM,
+                self.STANDARD_EXISTING_PV
+            ] if v not in self.standardised_asset_list.columns
+        ]
+        for v in missing_variables:
+            self.standardised_asset_list[v] = None
+
+    def merge_data(self, df: pd.DataFrame):
+        """
+        Used to insert data into the standardised asset list, based on the domna property id
+        :return:
+        """
+        if self.DOMNA_PROPERTY_ID not in df.columns:
+            raise ValueError(f"Dataframe must contain the column {self.DOMNA_PROPERTY_ID}")
+
+        if df[self.DOMNA_PROPERTY_ID].duplicated().sum():
+            raise ValueError(f"{self.DOMNA_PROPERTY_ID} contains duplicated IDs")
+
+        self.standardised_asset_list = self.standardised_asset_list.merge(
+            df, how="left", on=self.DOMNA_PROPERTY_ID
+        )
+
+    def extract_attributes(self):
+        # Used to extracty the typical attributes that we use to identify viable work
+
+        self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR] = (
+            self.standardised_asset_list[self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]] |
+            ~self.standardised_asset_list[self.EPC_API_DATA_NAMES["photo-supply"]].isin(["0.0", 0, None, ""])
+        )
+
+        accepted_epc_property_types = ["House", "Flat", "Bungalow", "Maisonette"]
+
+        # The logic here is:
+        # 1) Take the property type provided by the HA themselves
+        # 2) In absence of that, take the EPC property type
+        # 3) Otherwise use None
+        self.standardised_asset_list[self.ATTRIBUTE_NUMBER_OF_FLOORS] = self.standardised_asset_list.apply(
+            lambda x: estimate_number_of_floors(
+                property_type=(
+                    x[self.STANDARD_PROPERTY_TYPE].title() if
+                    x[self.STANDARD_PROPERTY_TYPE].title() in accepted_epc_property_types else (
+                        x[self.EPC_API_DATA_NAMES["property-type"]] if not
+                        pd.isnull(x[self.EPC_API_DATA_NAMES["property-type"]]) else None
+                    )
+                )
+            ),
+            axis=1
+        )
+
+        self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] = (
+            self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]].astype(float)
+        )
+        # Replace "" value with None
+        self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = (
+            self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].replace("", None)
+        )
+        self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = (
+            self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].astype(float)
+        )
+
+        # Estimate the perimeter
+        self.standardised_asset_list[self.ATTRIBUTE_ESTIMATED_PERIMETER] = self.standardised_asset_list.apply(
+            lambda x: estimate_perimeter(
+                floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
+                num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
+            ), axis=1
+        )
+
+        self.standardised_asset_list[self.ATTRIBUTE_HEAT_LOSS_AREA] = self.standardised_asset_list.apply(
+            lambda x: estimate_external_wall_area(
+                num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
+                floor_height=(
+                    float(x[self.EPC_API_DATA_NAMES["floor-height"]]) if
+                    x[self.EPC_API_DATA_NAMES["floor-height"]] else 2.5
+                ),
+                perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER],
+                built_form=x[self.EPC_API_DATA_NAMES["built-form"]]
+            ),
+            axis=1
+        )
+
+        self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply(
+            lambda x: RoofAttributes(description=x[self.EPC_API_DATA_NAMES["roof-description"]]).process()[
+                "insulation_thickness"] if not pd.isnull(
+                x[self.EPC_API_DATA_NAMES["roof-description"]]) else None,
+            axis=1
+        )
+
+        # We produce some additional fields
+        # 1) Is the SAP rating below C75
+        self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] = (
+            self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]].astype(float) <=
+            self.FILLED_CAVITY_SAP_THRESHOLD
+        )
+        # 2) Flag anything where the EPC is older than 5 years
+        self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] = (
+            pd.to_datetime(
+                self.standardised_asset_list[self.EPC_API_DATA_NAMES["inspection-date"]]
+            ).dt.year < self.EPC_YEAR_THRESHOLD
+        )
+
+        self.process_age_band()
+
+    def process_age_band(self):
+        processed_age_band = []
+        for _, x in self.standardised_asset_list.iterrows():
+
+            if pd.isnull(x[self.EPC_API_DATA_NAMES["construction-age-band"]]) or (
+                x[self.EPC_API_DATA_NAMES["construction-age-band"]] in Definitions.DATA_ANOMALY_MATCHES
+            ):
+                processed_age_band.append(
+                    {
+                        self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID],
+                        "epc_year_lower_bound": None,
+                        "epc_year_upper_bound": None,
+                        "does_age_band_match_epc_age_band": "No EPC Age Band"
+                    }
+                )
+                continue
+
+            # We exatract the upper and lower bounds
+            if x[self.EPC_API_DATA_NAMES["construction-age-band"]] in [
+                "England and Wales: 2007 onwards", "England and Wales: 2012 onwards"
+            ]:
+                year_lower_bound = 2007 if x[self.EPC_API_DATA_NAMES[
+                    "construction-age-band"]] == "England and Wales: 2007 onwards" else 2012
+
+                if pd.isnull(x[self.STANDARD_YEAR_BUILT]):
+                    age_band_matches = "No Year Built From Landlord"
+                else:
+                    age_band_matches = (
+                        "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] >= year_lower_bound
+                        else "EPC Age Band is older than Year Built"
+                    )
+
+                processed_age_band.append(
+                    {
+                        self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID],
+                        "epc_year_lower_bound": year_lower_bound,
+                        "epc_year_upper_bound": None,
+                        "does_age_band_match_epc_age_band": age_band_matches
+                    }
+                )
+                continue
+
+            if x[self.EPC_API_DATA_NAMES["construction-age-band"]] == "England and Wales: before 1900":
+
+                if pd.isnull(x[self.STANDARD_YEAR_BUILT]):
+                    age_band_matches = "No Year Built From Landlord"
+                else:
+                    age_band_matches = (
+                        "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] < 1900
+                        else "EPC Age Band is newer than Year Built"
+                    )
+
+                processed_age_band.append(
+                    {
+                        self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID],
+                        "epc_year_lower_bound": None,
+                        "epc_year_upper_bound": 1899,
+                        "does_age_band_match_epc_age_band": age_band_matches
+                    }
+                )
+                continue
+
+            if x[self.EPC_API_DATA_NAMES["construction-age-band"]].isdigit():
+
+                if pd.isnull(x[self.STANDARD_YEAR_BUILT]):
+                    age_band_matches = "No Year Built From Landlord"
+                else:
+                    age_band_matches = (
+                        "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] == int(
+                            x[self.EPC_API_DATA_NAMES["construction-age-band"]]
+                        )
+                        else "EPC Age Band is different from Year Built"
+                    )
+
+                processed_age_band.append(
+                    {
+                        self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID],
+                        "epc_year_lower_bound": int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]),
+                        "epc_year_upper_bound": int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]),
+                        "does_age_band_match_epc_age_band": age_band_matches
+                    }
+                )
+                continue
+
+            # Oherwise, we extract the upper and lower bounds
+            age_band = x[self.EPC_API_DATA_NAMES["construction-age-band"]].split(": ")[1]
+            lower_date, upper_date = age_band.split("-")
+
+            age_band_matches = (
+                "EPC Age Band Matches Year Built" if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date)) and (
+                    x[self.STANDARD_YEAR_BUILT] <= float(upper_date)
+                )
+                else "EPC Age Band is older than Year Built" if x[self.STANDARD_YEAR_BUILT] > float(upper_date)
+                else "EPC Age Band is newer than Year Built"
+            )
+
+            processed_age_band.append(
+                {
+                    self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID],
+                    "epc_year_lower_bound": int(lower_date),
+                    "epc_year_upper_bound": int(upper_date),
+                    "does_age_band_match_epc_age_band": age_band_matches
+                }
+            )
+
+        processed_age_band = pd.DataFrame(processed_age_band)
+
+        self.standardised_asset_list = self.standardised_asset_list.merge(
+            processed_age_band, how="left"
+        )
+
+    def identify_worktypes(self, cleaned):
+
+        if not self.non_intrusives_present:
+            raise NotImplementedError("Need to implement the case for non-intrusives")
+
+        # If we have non-intrusives completed, we can use this to identify work types
+
+        if self.non_intrusives_present:
+            ######################################################
+            # Empty cavity:
+            ######################################################
+            # 1) Has been flagged on the non-intrusives as being a cavity wall, empty or partially filled
+            # 2) The age is before 1995
+            # 3) We don't remove anything that haas access issues yet
+            self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = (
+                (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
+                (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") &
+                self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) &
+                (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) &
+                (
+                    self.standardised_asset_list[
+                        self.EPC_API_DATA_NAMES["current-energy-efficiency"]
+                    ] <= self.EMPTY_CAVITY_SAP_THRESHOLD
+                )
+            )
+            # Let's also flag work that looks eligible without the SAP filter
+            self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = (
+                (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) &
+                (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") &
+                self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) &
+                (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002)
+            )
+
+            # If non_intrusive_indicates_empty_cavity is True,
+            # set non_intrusive_indicates_empty_cavity_no_sap_filter to False
+            self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = np.where(
+                self.standardised_asset_list["non_intrusive_indicates_empty_cavity"],
+                False,
+                self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"]
+            )
+
+            self.standardised_asset_list["epc_indicates_empty_cavity"] = (
+                self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
+                    self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
+                ) & (
+                    self.standardised_asset_list["epc_year_upper_bound"] <= 1995
+                ) & (
+                    ~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD]
+                ) & (
+                    self.standardised_asset_list[
+                        self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= self.EMPTY_CAVITY_SAP_THRESHOLD
+                )
+            )
+
+            # If the EPC is esimtated, we defer to the non-intrusives
+            self.standardised_asset_list["epc_indicates_empty_cavity"] = np.where(
+                (
+                    self.standardised_asset_list["epc_indicates_empty_cavity"] &
+                    ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
+                    self.standardised_asset_list["estimated"]
+                ),
+                False,
+                self.standardised_asset_list["epc_indicates_empty_cavity"]
+            )
+
+            ######################################################
+            # Extraction
+            ######################################################
+
+            # as needing a CIGA check. What is the logic we should be applying here?
+            self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = (
+                (self.standardised_asset_list["non-intrusives: Construction"] == "CAVITY") &
+                (self.standardised_asset_list["non-intrusives: Insulated"].isin(["RETRO DRILLED", "FILLED AT BUILD"])) &
+                (~self.standardised_asset_list['non-intrusives: Material'].isin(
+                    ["GREY LOOSE BEAD", "COMPACTED BEAD", "FIBRE BATT NO CAVITY", "EMPTY NARROW BELOW 30mm"]
+                )
+                 ) & (
+                    self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW]
+                )
+            )
+
+            # Also include work without the SAP filter as optimistic
+            self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = (
+                (self.standardised_asset_list["non-intrusives: Construction"] == "CAVITY") &
+                (self.standardised_asset_list["non-intrusives: Insulated"].isin(["RETRO DRILLED", "FILLED AT BUILD"])) &
+                (~self.standardised_asset_list['non-intrusives: Material'].isin(
+                    ["GREY LOOSE BEAD", "COMPACTED BEAD", "FIBRE BATT NO CAVITY", "EMPTY NARROW BELOW 30mm"]
+                )
+                 )
+            )
+
+            # Adjust
+            self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = np.where(
+                self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"],
+                False,
+                self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"]
+            )
+
+            ######################################################
+            # Solar
+            ######################################################
+            # Criteria:
+            # Check 1: Does the property have a valid heating system?
+            self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] = (
+                self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
+                    ["air source heat pump", "ground source heat pump", "high heat retention storage heaters"]
+                )
+            )
+
+            self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] = (
+                (
+                    self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]]
+                    .str.lower().str.contains("air source heat pump|ground source heat pump")
+                ) | (
+                    self.standardised_asset_list[
+                        self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains(
+                        "electric storage heaters"
+                    ) & (
+                        self.standardised_asset_list[self.EPC_API_DATA_NAMES[
+                            "mainheatcont-description"]] == "Controls for high heat retention storage heaters"
+                    )
+                )
+            )
+
+            # Check 2: Does the property have solar already
+            self.standardised_asset_list["property_has_solar"] = (
+                (self.standardised_asset_list[self.STANDARD_EXISTING_PV] == "already has PV") |
+                (self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF") |
+                (self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR])
+            )
+
+            # Check 3: Does the property meet the fabric condition
+            # Solar PV installs are subject to the minimum insulation requirements which means:
+            # 1) one of the following insulation measures must be installed as part of the same
+            # ECO4 project:
+            # • roof insulation (flat roof, pitched roof, room-in-roof)
+            # • exterior facing wall insulation (cavity wall, solid wall)
+            # • party cavity wall insulation
+            # • floor insulation (solid and underfloor)
+            #
+            # OR
+            #
+            # all measures (except any exempted measure referred to in paragraph 4.28)
+            # listed in paragraph a) must already be installed
+            #
+            # With this in mind, we look for 2 clases
+            # 1) The property is fully insulated apart from the loft (<200mm insulation)
+            # 2) THe property is fully insulated
+
+            self.standardised_asset_list["solar_landlord_walls_insulated"] = (
+                self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(
+                    ["filled cavity", "insulated solid brick"]
+                )
+            )
+
+            self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = (
+                self.standardised_asset_list["non-intrusives: Insulated"].isin(
+                    ["EWI", "RETRO DRILLED", "FILLED AT BUILD"]
+                )
+            )
+
+            # TODO: We don't have information about the roof from this landlord
+
+            # We merge on the u-value for average thermal transmittance
+            walls_uvalue_data = pd.DataFrame(cleaned["walls-description"])
+            walls_uvalue_data = walls_uvalue_data[
+                ~pd.isnull(walls_uvalue_data["thermal_transmittance"])
+            ][["original_description", "thermal_transmittance"]].rename(
+                columns={
+                    "original_description": self.EPC_API_DATA_NAMES["walls-description"],
+                    "thermal_transmittance": "walls_u_value"
+                }
+            )
+            self.standardised_asset_list = self.standardised_asset_list.merge(
+                walls_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["walls-description"]
+            )
+
+            self.standardised_asset_list["solar_epc_walls_insulated"] = (
+                (
+                    self.standardised_asset_list[
+                        self.EPC_API_DATA_NAMES[
+                            "walls-description"]].str.lower().str.contains(
+                        "|".join(
+                            self.EPC_INSULATED_WALLS_SUBSTRINGS)
+                    )
+                ) | (
+                    self.standardised_asset_list[
+                        "walls_u_value"].apply(
+                        lambda x: x <= 0.7 if not pd.isnull(x) else False
+                    )
+                )
+            )
+
+            # We merge on the u-value for average thermal transmittance
+            roof_uvalue_data = pd.DataFrame(cleaned["roof-description"])
+            roof_uvalue_data = roof_uvalue_data[
+                ~pd.isnull(roof_uvalue_data["thermal_transmittance"])
+            ][["original_description", "thermal_transmittance"]].rename(
+                columns={
+                    "original_description": self.EPC_API_DATA_NAMES["roof-description"],
+                    "thermal_transmittance": "roof_u_value"
+                }
+            )
+
+            self.standardised_asset_list = self.standardised_asset_list.merge(
+                roof_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
+            )
+
+            # If the u-value of a roof is less than 0.7 we consider it insulated
+            self.standardised_asset_list["solar_epc_roof_insulated"] = (
+                self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]].str.lower().str.contains(
+                    "|".join(self.EPC_INSULATED_ROOF_SUBSTRINGS), regex=False
+                ) | (
+                    self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply(
+                        lambda x: int(x) >= 200 if str(x).isdigit() else False
+                    )
+                ) | (
+                    self.standardised_asset_list["roof_u_value"].apply(
+                        lambda x: x <= 0.7 if not pd.isnull(x) else False
+                    )
+                )
+            )
+
+            self.standardised_asset_list["solar_epc_loft_needs_topup"] = self.standardised_asset_list[
+                self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply(
+                lambda x: int(x) < 200 if str(x).isdigit() else False
+            )
+
+            # TODO: Fill with False - should be temp!
+            self.standardised_asset_list["epc_has_floor_recommendation"] = (
+                self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False)
+            )
+
+            # We merge on the u-value for average thermal transmittance
+            floors_uvalue_data = pd.DataFrame(cleaned["floor-description"])
+            floors_uvalue_data = floors_uvalue_data[
+                ~pd.isnull(floors_uvalue_data["thermal_transmittance"])
+            ][["original_description", "thermal_transmittance"]].rename(
+                columns={
+                    "original_description": self.EPC_API_DATA_NAMES["floor-description"],
+                    "thermal_transmittance": "floor_u_value"
+                }
+            )
+
+            # Merge on
+            self.standardised_asset_list = self.standardised_asset_list.merge(
+                floors_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["floor-description"]
+            )
+
+            # We assume that a U-value of 0.5 or below is indicative of an insulated floor
+            self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] = (
+                (
+                    (
+                        self.standardised_asset_list[self.EPC_API_DATA_NAMES["floor-description"]].str
+                        .lower().str.contains("solid")
+                    ) & (
+                        ~self.standardised_asset_list["epc_has_floor_recommendation"]
+                    ) & (
+                        # We do not utilise estimated EPCs for this method because we will always find that
+                        # "epc_has_floor_recommendation" is False
+                        (self.standardised_asset_list["estimated"] == False)
+                    )
+                ) | (
+                    (
+                        self.standardised_asset_list[
+                            self.EPC_API_DATA_NAMES["floor-description"]].str.lower().str.contains("solid")
+                    ) & (
+                        self.standardised_asset_list[self.EPC_API_DATA_NAMES["floor-description"]].str.lower()
+                        .str.contains(", insulated")
+                    )
+                )
+            )
+
+            # Check for other floor types, insulated
+            self.standardised_asset_list["solar_epc_floor_is_other_insulated"] = (
+                # The floor is suspended and insulated
+                (
+                    (
+                        self.standardised_asset_list[self.EPC_API_DATA_NAMES["floor-description"]].str
+                        .lower().str.contains("suspended")
+                    ) & (
+                        ~self.standardised_asset_list["epc_has_floor_recommendation"]
+                    ) & (
+                        # We do not utilise estimated EPCs for this method because we will always find that
+                        # "epc_has_floor_recommendation" is False
+                        self.standardised_asset_list["estimated"] == False
+                    )
+                ) | (
+                    (
+                        self.standardised_asset_list[
+                            self.EPC_API_DATA_NAMES["floor-description"]
+                        ].str.lower().str.contains("suspended")
+                    ) & (
+                        self.standardised_asset_list[
+                            self.EPC_API_DATA_NAMES["floor-description"]
+                        ].str.lower().str.contains(", insulated")
+                    )
+                ) | (
+                    self.standardised_asset_list["floor_u_value"].apply(
+                        lambda x: x <= 0.5 if not pd.isnull(x) else False
+                    )
+                )
+            )
+
+            # We now put together the criteria:
+            # Flag properties that look eligible for solar, that have solid floors
+            # TODO: We'll need to revise this
+            self.standardised_asset_list["solar_eligible_solid_floor"] = (
+                # Landlord data or EPC data indicates the heating system is appropriate
+                (
+                    self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] |
+                    self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"]
+                ) &
+                # The property doesn't currently have solar
+                ~self.standardised_asset_list["property_has_solar"] &
+                # The walls are insulated
+                (
+                    self.standardised_asset_list["solar_landlord_walls_insulated"] |
+                    self.standardised_asset_list["solar_epc_walls_insulated"] |
+                    self.standardised_asset_list["solar_non_intrusives_walls_insulated"]
+                ) &
+                # Roof is insulated
+                self.standardised_asset_list["solar_epc_roof_insulated"] &
+                self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"]
+            )
+
+            # Solid floor but needs a loft top-up
+            self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"] = (
+                # Landlord data or EPC data indicates the heating system is appropriate
+                (
+                    self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] |
+                    self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"]
+                ) &
+                # The property doesn't currently have solar
+                ~self.standardised_asset_list["property_has_solar"] &
+                # The walls are insulated
+                (
+                    self.standardised_asset_list["solar_landlord_walls_insulated"] |
+                    self.standardised_asset_list["solar_epc_walls_insulated"] |
+                    self.standardised_asset_list["solar_non_intrusives_walls_insulated"]
+                ) &
+                # Roof is insulated
+                self.standardised_asset_list["solar_epc_loft_needs_topup"] &
+                self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"]
+            )
+
+            # Other floor type, fully insulated
+
+            self.standardised_asset_list["solar_eligible_other_floor"] = (
+                # Landlord data or EPC data indicates the heating system is appropriate
+                (
+                    self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] |
+                    self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"]
+                ) &
+                # The property doesn't currently have solar
+                ~self.standardised_asset_list["property_has_solar"] &
+                # The walls are insulated
+                (
+                    self.standardised_asset_list["solar_landlord_walls_insulated"] |
+                    self.standardised_asset_list["solar_epc_walls_insulated"]
+                ) &
+                # Roof is insulated
+                self.standardised_asset_list["solar_epc_roof_insulated"] &
+                self.standardised_asset_list["solar_epc_floor_is_other_insulated"]
+            )
+
+            # Other floor type, needs loft top-up
+            self.standardised_asset_list["solar_eligible_other_floor_needs_loft"] = (
+                # Landlord data or EPC data indicates the heating system is appropriate
+                (
+                    self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] |
+                    self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"]
+                ) &
+                # The property doesn't currently have solar
+                ~self.standardised_asset_list["property_has_solar"] &
+                # The walls are insulated
+                (
+                    self.standardised_asset_list["solar_landlord_walls_insulated"] |
+                    self.standardised_asset_list["solar_epc_walls_insulated"]
+                ) &
+                # Roof need loft top-up
+                self.standardised_asset_list["solar_epc_loft_needs_topup"] &
+                # Floor is not solid, but is insulated
+                self.standardised_asset_list["solar_epc_floor_is_other_insulated"]
+            )
+
+            # Drop anything we don't need
+            self.standardised_asset_list = self.standardised_asset_list.drop(
+                columns=["walls_u_value", "roof_u_value", "floor_u_value"]
+            )
+
+            # Adjust flagged extraction jobs to remove anything for solar
+            self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = (
+                self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] &
+                ~self.standardised_asset_list["solar_eligible_solid_floor"] &
+                ~self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"]
+                # ~self.standardised_asset_list["solar_eligible_other_floor"] &
+                # ~self.standardised_asset_list["solar_eligible_other_floor_needs_loft"]
+            )
+
+        blocks_of_flats = self.standardised_asset_list[
+            self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats"
+            ]
+
+        non_blocks_of_flats = self.standardised_asset_list[
+            self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats"
+            ]
+
+        # Produce some aggregate figures
+        self.work_type_figures = {
+            # Empty cavity from non-intrusives
+            "Empty Cavity (non-intrusives)": non_blocks_of_flats["non_intrusive_indicates_empty_cavity"].sum(),
+            "Empty Cavity (non-intrusives, blocks of flats)": (
+                blocks_of_flats["non_intrusive_indicates_empty_cavity"].sum()
+            ),
+            "Empty Cavity (non-intrusives, no SAP filter)": (
+                non_blocks_of_flats["non_intrusive_indicates_empty_cavity_no_sap_filter"].sum()
+            ),
+            "Empty Cavity (non-intrusives, no SAP filter, blocks of flats)": (
+                blocks_of_flats["non_intrusive_indicates_empty_cavity_no_sap_filter"].sum()
+            ),
+            "Empty Cavity (EPC)": (
+                (
+                    non_blocks_of_flats["epc_indicates_empty_cavity"] &
+                    ~non_blocks_of_flats["non_intrusive_indicates_empty_cavity"]
+                ).sum()
+            ),
+            "Empty Cavity (EPC, blocks of flat)": (
+                (
+                    blocks_of_flats["epc_indicates_empty_cavity"] &
+                    ~blocks_of_flats["non_intrusive_indicates_empty_cavity"]
+                ).sum()
+            ),
+            "Cavity Extraction": (
+                (
+                    ~non_blocks_of_flats["non_intrusive_indicates_empty_cavity"] &
+                    ~non_blocks_of_flats["epc_indicates_empty_cavity"] &
+                    non_blocks_of_flats["non_intrusive_indicates_cavity_extraction"]
+                ).sum()
+            ),
+            "Cavity Extraction (blocks of flats)": (
+                (
+                    ~blocks_of_flats["non_intrusive_indicates_empty_cavity"] &
+                    ~blocks_of_flats["epc_indicates_empty_cavity"] &
+                    blocks_of_flats["non_intrusive_indicates_cavity_extraction"]
+                ).sum()
+            ),
+            "Cavity Extraction (no SAP filter)": (
+                (
+                    ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
+                    ~self.standardised_asset_list["epc_indicates_empty_cavity"] &
+                    self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"]
+                ).sum()
+            ),
+            "Solar PV (Solid Floor)": (
+                self.standardised_asset_list["solar_eligible_solid_floor"].sum()
+            ),
+            "Solar PV (Solid Floor, Needs Loft Top-up)": (
+                self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"].sum()
+            ),
+            "Solar PV (Other Floor)": (
+                self.standardised_asset_list["solar_eligible_other_floor"].sum()
+            ),
+            "Solar PV (Other Floor, Needs Loft Top-up)": (
+                self.standardised_asset_list["solar_eligible_other_floor_needs_loft"].sum()
+            )
+        }
+
+        # We produce a breakdown of the property types, for cavity fills
+        cavity_fills = self.standardised_asset_list[
+            self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] | (
+                self.standardised_asset_list["epc_indicates_empty_cavity"]
+            )
+            ]
+
+        self.work_type_breakdowns = {
+            "empty_cavity": cavity_fills[self.STANDARD_PROPERTY_TYPE].value_counts()
+        }
+
+        # Finally, we note why each property has been flagged
+        self.standardised_asset_list["cavity_reason"] = None
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            self.standardised_asset_list["non_intrusive_indicates_empty_cavity"],
+            "Non-Intrusive Data Showed Empty Cavity",
+            self.standardised_asset_list["cavity_reason"]
+        )
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"],
+            "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed",
+            self.standardised_asset_list["cavity_reason"]
+        )
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            (
+                self.standardised_asset_list["epc_indicates_empty_cavity"] &
+                ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]
+            ),
+            "EPC Data Showed Empty Cavity",
+            self.standardised_asset_list["cavity_reason"]
+        )
+        # Flag extraction
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            (
+                self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] &
+                pd.isnull(self.standardised_asset_list["cavity_reason"])
+            ),
+            "Non-Intrusive Data Showed Cavity Extraction",
+            self.standardised_asset_list["cavity_reason"]
+        )
+        # extraction no sap filter
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            (
+                self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] &
+                pd.isnull(self.standardised_asset_list["cavity_reason"])
+            ),
+            "Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed",
+            self.standardised_asset_list["cavity_reason"]
+        )
+
+        # Flag solar
+        self.standardised_asset_list["solar_reason"] = None
+        self.standardised_asset_list["solar_reason"] = np.where(
+            self.standardised_asset_list["solar_eligible_solid_floor"],
+            "Solid Floor, Insulated, No Solar",
+            self.standardised_asset_list["solar_reason"]
+        )
+        self.standardised_asset_list["solar_reason"] = np.where(
+            self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"],
+            "Solid Floor, Insulated, Needs Loft",
+            self.standardised_asset_list["solar_reason"]
+        )
+        self.standardised_asset_list["solar_reason"] = np.where(
+            self.standardised_asset_list["solar_eligible_other_floor"],
+            "Other Floor, Insulated, No Solar",
+            self.standardised_asset_list["solar_reason"]
+        )
+        self.standardised_asset_list["solar_reason"] = np.where(
+            self.standardised_asset_list["solar_eligible_other_floor_needs_loft"],
+            "Other Floor, Insulated, Needs Loft",
+            self.standardised_asset_list["solar_reason"]
+        )
+
+    def flat_analysis(self):
+
+        # We need to deduce the building name - we strip out the house number
+
+        # We want to deduce if flats have 50% of the properties below C75
+        # We group by postcode and property type
+        grouped = self.standardised_asset_list.groupby(
+            [self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE]
+        )
+
+        flat_data = []
+        for _, group in grouped:
+            if "flat" in group[self.STANDARD_PROPERTY_TYPE].values:
+                num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0]
+                num_below_c75 = group[
+                    self.EPC_API_DATA_NAMES["current-energy-efficiency"]
+                ].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum()
+                # Check if any flats are below C69
+                num_flats_below_c69 = group[
+                    self.EPC_API_DATA_NAMES["current-energy-efficiency"]
+                ].lt(69).sum()
+
+                flat_data.append(
+                    {
+                        "Postcode": group[self.STANDARD_POSTCODE].iloc[0],
+                        "Property Type": "Flat",
+                        "Number of Flats with EPC": num_flats,
+                        "Number of Flats below C75": num_below_c75,
+                        "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats),
+                        "Number of Flats Below C69": num_flats_below_c69,
+                    }
+                )
+
+        flat_data = pd.DataFrame(flat_data)
+
+        self.flat_data = flat_data
diff --git a/asset_list/app.py b/asset_list/app.py
new file mode 100644
index 00000000..84999e93
--- /dev/null
+++ b/asset_list/app.py
@@ -0,0 +1,480 @@
+import os
+import time
+import json
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from pprint import pprint
+import msgpack
+from utils.s3 import read_from_s3
+from asset_list.AssetList import AssetList
+from asset_list.mappings.property_type import PROPERTY_MAPPING
+from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS
+from asset_list.mappings.heating_systems import HEATING_MAPPINGS
+from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS
+
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(
+    df, manual_uprn_map, epc_api_only=False, row_id_name="row_id"
+):
+    uprn_column = AssetList.STANDARD_UPRN
+    fulladdress_column = AssetList.STANDARD_FULL_ADDRESS
+    address1_column = AssetList.STANDARD_ADDRESS_1
+    postcode_column = AssetList.STANDARD_POSTCODE
+
+    # These re-map the standard property types to forms accepted by the EPC api, so we can predict EPCs
+    property_type_map = {
+        "house": "House",
+        "flat": "Flat",
+        "maisonette": "Maisonette",
+        "bungalow": "Bungalow",
+        "block house": "House",
+        "coach house": "House",
+        "bedsit": "Flat"
+    }
+
+    epc_data = []
+    errors = []
+    no_epc = []
+    for _, home in tqdm(df.iterrows(), total=len(df)):
+        try:
+
+            # If we have a block of flats, we cannot retrieve this data
+            if home[AssetList.STANDARD_PROPERTY_TYPE] == "block of flats":
+                no_epc.append(home[row_id_name])
+                continue
+
+            postcode = home[postcode_column]
+            house_number = str(home[address1_column]).strip()
+            full_address = home[fulladdress_column].strip()
+            house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode)
+            if house_no is None:
+                house_no = house_number
+            uprn = manual_uprn_map.get(full_address, None)
+            if uprn is None and home.get(uprn_column):
+                uprn = home[uprn_column]
+
+            if pd.isnull(uprn):
+                uprn = None
+
+            property_type = property_type_map.get(home[AssetList.STANDARD_PROPERTY_TYPE], None)
+
+            searcher = SearchEpc(
+                address1=str(house_no),
+                postcode=postcode,
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address,
+                max_retries=5,
+                uprn=uprn
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+
+            # Check if we have a flat or appartment
+            if searcher.newest_epc is None and uprn is None:
+                # Try again:
+                if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None:
+                    # Backup
+                    add1 = full_address.split(",")
+                    if len(add1) > 1:
+                        add1 = add1[1].strip()
+                    else:
+                        # Try splitting on space
+                        add1 = full_address.split(" ")[0].strip()
+
+                else:
+                    add1 = str(house_number)
+                searcher = SearchEpc(
+                    address1=add1,
+                    postcode=postcode,
+                    auth_token=EPC_AUTH_TOKEN,
+                    os_api_key="",
+                    property_type=None,
+                    fast=True,
+                    full_address=full_address,
+                    max_retries=5
+                )
+
+                if (
+                    "flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in
+                    house_number.lower()
+                ):
+                    searcher.ordnance_survey_client.property_type = "Flat"
+
+                searcher.find_property(skip_os=True)
+
+            # As a final resort, we estimate the EPC
+            if property_type is not None and searcher.newest_epc is None:
+                searcher.ordnance_survey_client.property_type = property_type
+                searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                no_epc.append(home[row_id_name])
+                continue
+
+            if epc_api_only:
+                epc = {
+                    row_id_name: home[row_id_name],
+                    **searcher.newest_epc.copy()
+                }
+
+                epc_data.append(epc)
+                continue
+
+            # Look for EPC recommendatons
+            try:
+                property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+            except:
+                property_recommendations = {"rows": []}
+
+            # Retrieve data from FindMyEPC
+            try:
+                find_epc_searcher = RetrieveFindMyEpc(
+                    address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"]
+                )
+                find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+            except ValueError as e:
+                if "No EPC found" in str(e) and "address1" in searcher.newest_epc:
+                    try:
+                        find_epc_searcher = RetrieveFindMyEpc(
+                            address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"]
+                        )
+                        find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+                    except ValueError as e:
+                        if "No EPC found" in str(e):
+                            find_epc_data = {}
+                else:
+                    find_epc_data = {}
+            except Exception as e:
+                raise Exception(f"Error retrieving FindMyEPC data: {e}")
+            time.sleep(np.random.uniform(0.1, 1))
+
+            epc = {
+                row_id_name: home[row_id_name],
+                **searcher.newest_epc.copy(),
+                "recommendations": property_recommendations["rows"],
+                "find_my_epc_data": find_epc_data,
+            }
+
+            epc_data.append(epc)
+        except Exception as e:
+            errors.append(home[row_id_name])
+            time.sleep(5)
+
+    return epc_data, errors, no_epc
+
+
+def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
+    if method == "first_two_words":
+        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
+        return asset_list
+
+    if method == "first_word":
+        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
+        return asset_list
+
+    if method == "house_number_extraction":
+        asset_list["address1_extracted"] = asset_list.apply(
+            lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
+            axis=1
+        )
+        return asset_list
+
+    raise ValueError(f"Method {method} not recognized")
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by Livewest
+
+    Data request contents:
+    Date of last EPC
+    Reason for EPC
+    SAP score on register
+    Property Type
+    Property Area
+    Property Age
+    Any Dimensions (HLP,PW,RH)
+    Property Wall Construction
+    Heating Type
+    Secondary Heating
+    Loft Insulation Depth
+
+    Additional if possible:
+    Heat loss calculations
+    EPC recommendations
+    Property UPRN
+    """
+
+    # TODO:
+    # For cavity work:
+    # - Flag any entries that have a different wall type between non-intrusive data against EPC
+    # - Worth double checking entries that have a difference in wall construction
+    # - Look at anything that is flagged as an empty cavity but the EPC data says it’s a filled cavity
+    # - Look at the current EPC scores - Anything that is C75 or above, especially if it’s assumed no insulation
+    # - By postcode, we can try and deduce if all of the addresses are a flats and then estimate if 50% of the flats
+    # are less than C75
+    # - Flag anything pre SAP2012
+    # - Flag anything over 5 years old
+    # - Look at year built vs age band
+    #
+    # For Solar:
+    # - Discount any that have solar PV - based on non-intrusives and from the inspections team
+    # - In the heating, discount anything that isn’t ashp, ghsp, hhrs, electric storage - possibly homes with
+    # electric room heaters but it might need to be an EPC E
+    # - Fabric - check the floor, wall and roof:
+    #     - Filled or empty cavity is good
+    #     - Insulated solid/timber/system built is good
+    #     - SCIS/CEG needs solid floors
+    #     - JJC don’t care
+    #     - Anything with a loft 200 or below
+    # - Anything C75 and above won’t qualify
+    # - Insulated loft = 200mm
+    # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
+    # - Or the insulation required is loft/cavity (floors should be solid)
+
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
+    data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
+    sheet_name = "Sheet1"
+    postcode_column = 'Full Address.1'
+    fulladdress_column = "Full Address"
+    address1_column = None
+    address1_method = "first_word"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    landlord_year_built = "Build Date"
+    landlord_os_uprn = None
+    landlord_property_type = "Property Type"
+    landlord_wall_construction = "Wallinsul"
+    landlord_heating_system = "HeatSorc"
+    landlord_existing_pv = None
+    landlord_property_id = "Property Reference"
+
+    # For Westward
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
+    # data_filename = "WESTWARD - completed list..xlsx"
+    # sheet_name = "Sheet1"
+    # postcode_column = "WFT EDIT Postcode"
+    # fulladdress_column = "Address"
+    # address1_column = None
+    # address1_method = "house_number_extraction"
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "Build date"
+    # landlord_os_uprn = "UPRN"
+    # landlord_property_type = "Location type"
+    # landlord_wall_construction = "Wall Construction (EPC)"
+    # landlord_heating_system = "Heat Source"
+    # landlord_existing_pv = "PV (Y/N)"
+    # landlord_property_id = "Place ref"
+
+    # Maps addresses to uprn in problematic cases
+    manual_uprn_map = {}
+
+    asset_list = AssetList(
+        local_filepath=os.path.join(data_folder, data_filename),
+        header=0,
+        sheet_name=sheet_name,
+        address1_colname=address1_column,
+        postcode_colname=postcode_column,
+        landlord_property_id=landlord_property_id,
+        full_address_colname=fulladdress_column,
+        full_address_cols_to_concat=address_cols_to_concat,
+        missing_postcodes_method=missing_postcodes_method,
+        address1_extraction_method=address1_method,
+        landlord_year_built=landlord_year_built,
+        landlord_uprn=landlord_os_uprn,
+        landlord_property_type=landlord_property_type,
+        landlord_wall_construction=landlord_wall_construction,
+        landlord_heating_system=landlord_heating_system,
+        landlord_existing_pv=landlord_existing_pv
+    )
+    asset_list.init_standardise()
+
+    # We produce the new maps, which can be saved for future useage
+
+    new_property_type_map = PROPERTY_MAPPING.copy().update(
+        asset_list.variable_mappings[asset_list.landlord_property_type] if asset_list.landlord_property_type else {}
+    )
+    new_wall_map = WALL_CONSTRUCTION_MAPPINGS.copy().update(
+        asset_list.variable_mappings[asset_list.landlord_wall_construction] if
+        asset_list.landlord_wall_construction else {}
+    )
+    new_heating_map = HEATING_MAPPINGS.copy().update(
+        asset_list.variable_mappings[asset_list.landlord_heating_system] if asset_list.landlord_heating_system else {}
+    )
+    new_existing_pv_map = EXISTING_PV_MAPPINGS.copy().update(
+        asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
+    )
+
+    asset_list.apply_standardiation()
+
+    ### We retrieve the EPC data
+
+    # We chunk up this data into 5000 rows at a time
+    # Create the chunks directory
+    force_retrieve_data = False
+    skip = None  # Used to skip already completed chunks
+    chunk_size = 5000
+    filename = "Chunk {i}.csv"
+    download_folder = os.path.join(data_folder, "Chunks")
+    if not os.path.exists(download_folder):
+        os.makedirs(download_folder)
+
+    chunk_indexes = list(range(0, len(asset_list.standardised_asset_list), chunk_size))
+    downloaded_files = {filename.format(i=i) for i in chunk_indexes}
+
+    # We check if we have files associated to these files already and if we do, and we do not want to force the
+    # fetching of the data, we skip
+    folder_contents = os.listdir(download_folder)
+    if all(x in folder_contents for x in downloaded_files):
+        skip = max(chunk_indexes)
+
+    for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
+        print(f"Processing chunk {i} to {i + chunk_size}")
+        if skip is not None and not force_retrieve_data:
+            if i <= skip:
+                continue
+        chunk = asset_list.standardised_asset_list[i:i + chunk_size]
+        epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
+            df=chunk,
+            row_id_name=asset_list.DOMNA_PROPERTY_ID,
+            manual_uprn_map=manual_uprn_map,
+        )
+
+        # We now retrieve any failed properties
+        chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)]
+        epc_data_failed, _, _ = get_data(
+            df=chunk_failed,
+            row_id_name=asset_list.DOMNA_PROPERTY_ID,
+            manual_uprn_map=manual_uprn_map,
+            epc_api_only=False
+        )
+
+        epc_data_chunk.extend(epc_data_failed)
+
+        # Append the failed data to the main data
+        # Store the chunk locally as a csv
+        pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
+        # Store the errors and no-data locally
+        with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
+            json.dump(errors_chunk, f)
+
+        with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f:
+            json.dump(no_epc_chunk, f)
+
+    # We read in and concatenate the created created chunks
+    # List the contents
+    epc_data = []
+    for file in downloaded_files:
+        csv_data = pd.read_csv(os.path.join(download_folder, file))
+        # We need to convert the recommendations back to a list
+        csv_data["recommendations"] = csv_data["recommendations"].apply(eval)
+        csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval)
+        epc_data.append(csv_data)
+
+    epc_df = pd.concat(epc_data)
+    epc_df["estimated"] = epc_df["estimated"].fillna(False)
+
+    # We expand out the recommendations
+    recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]]
+
+    unique_recommendations = set()
+    for _, row in recommendations_df.iterrows():
+        unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+
+    columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
+    transformed_data = []
+    for _, row in recommendations_df.iterrows():
+        # Initialize a dictionary for this row with False for all recommendations
+        row_data = {col: False for col in columns}
+        row_data[asset_list.DOMNA_PROPERTY_ID] = row[asset_list.DOMNA_PROPERTY_ID]
+
+        # Set True for each recommendation present in this row
+        for rec in row["recommendations"]:
+            recommendation_text = rec["improvement-summary-text"]
+            row_data[recommendation_text] = True
+
+        # Append the row data to transformed_data
+        transformed_data.append(row_data)
+
+    transformed_df = pd.DataFrame(transformed_data)
+    transformed_df = transformed_df[
+        [
+            asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
+            "Floor insulation", "Floor insulation (suspended floor)"
+        ]
+    ]
+
+    transformed_df["epc_has_floor_recommendation"] = (
+        transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
+        transformed_df["Floor insulation (suspended floor)"]
+    )
+
+    # Get the find my epc data
+    find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop(
+        columns=["find_my_epc_data"]).join(
+        pd.json_normalize(epc_df["find_my_epc_data"])
+    )
+    find_my_epc_data = find_my_epc_data.merge(
+        transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
+        how="left", on=asset_list.DOMNA_PROPERTY_ID
+    )
+
+    # We check if we get the solar pv column:
+    if "Solar photovoltaics" not in find_my_epc_data.columns:
+        find_my_epc_data["Solar photovoltaics"] = False
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
+        ].rename(
+        columns=asset_list.EPC_API_DATA_NAMES
+    )
+
+    epc_df = epc_df.merge(
+        find_my_epc_data[
+            [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
+            ]
+        .rename(columns=asset_list.FIND_EPC_DATA_NAMES),
+        how="left",
+        on=asset_list.DOMNA_PROPERTY_ID
+    )
+
+    asset_list.merge_data(epc_df)
+
+    asset_list.extract_attributes()
+
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
+    # TODO: We should break out the identification of work types to flag blocks of flats specifically
+    asset_list.identify_worktypes(cleaned)
+
+    pprint(asset_list.work_type_figures)
+
+    asset_list.flat_analysis()
+
+    # Store as an excel
+    filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
+    # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+
+    with pd.ExcelWriter(filename) as writer:
+        asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+        asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py
new file mode 100644
index 00000000..06e77bba
--- /dev/null
+++ b/asset_list/mappings/exising_pv.py
@@ -0,0 +1,12 @@
+STANDARD_EXISTING_PV = {
+    "already has PV", "no PV", "unknown"
+}
+
+EXISTING_PV_MAPPINGS = {
+    "NO": "no PV",
+    "YES": "already has PV",
+    "no": "no PV",
+    "yes": "already has PV",
+    True: "already has PV",
+    False: "no PV",
+}
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
new file mode 100644
index 00000000..4879efcc
--- /dev/null
+++ b/asset_list/mappings/heating_systems.py
@@ -0,0 +1,67 @@
+import numpy as np
+
+STANDARD_HEATING_SYSTEMS = {
+    "gas combi boiler",
+    "electric storage heaters",
+    "district heating",
+    "gas condensing boiler",
+    "oil boiler",
+    "gas condensing combi",
+    "air source heat pump",
+    "boiler - other fuel",
+    "ground source heat pump",
+    "electric radiators",
+    "other",
+    "electric boiler",
+    "unknown",
+    "communal gas boiler",
+    "high heat retention storage heaters",
+}
+
+HEATING_MAPPINGS = {
+    "Combi - GAS": "gas combi boiler",
+    "E7 Storage Heaters": "electric storage heaters",
+    "District heating system": "district heating",
+    "Condensing Boiler - GAS": "gas condensing boiler",
+    "Boiler Oil/other": "oil boiler",
+    "Condensing Combi - Gas": "gas condensing combi",
+    "Air Source Source Heat Pump": "air source heat pump",
+    "Biomass Boiler": "boiler - other fuel",
+    "Ground Source Heat Pump": "ground source heat pump",
+    "Electric Oil filled radiators": "electric radiators",
+    "Solid Fuel": "other",
+    "LPG Boiler": "boiler - other fuel",
+    "Electric Boiler": "electric boiler",
+    "No data": "unknown",
+    "Boiler Communal/Commercial - GAS": "communal gas boiler",
+    "Eco Electric Radiators": "electric radiators",
+    "Gas fire": "other",
+    "Backboiler - Solid fuel": "other",
+    'combi - gas': 'gas combi boiler',
+    'e7 storage heaters': 'electric storage heaters',
+    'district heating system': 'district heating',
+    'condensing boiler - gas': 'gas condensing boiler',
+    'boiler oil/other': 'oil boiler',
+    'condensing combi - gas': 'gas condensing combi',
+    'air source source heat pump': 'air source heat pump',
+    'biomass boiler': 'boiler - other fuel',
+    'ground source heat pump': 'ground source heat pump',
+    'electric oil filled radiators': 'electric radiators',
+    'solid fuel': 'other',
+    'lpg boiler': 'boiler - other fuel',
+    'electric boiler': 'electric boiler',
+    'no data': 'unknown', 'boiler communal/commercial - gas': 'communal gas boiler',
+    'eco electric radiators': 'electric radiators',
+    'gas fire': 'other', 'backboiler - solid fuel': 'other',
+    'ASHP': 'air source heat pump',
+    'COMMHEAT': 'communal gas boiler',
+    'GBB': 'gas combi boiler',
+    'GFS': 'gas condensing boiler',
+    'GWA': 'gas condensing boiler',
+    'GWM': 'gas condensing combi',
+    'HDU': 'district heating',
+    'OILBLR': 'oil boiler',
+    'SOLIDFUEL': 'boiler - other fuel',
+    'STORHTR': 'electric storage heaters',
+    np.nan: 'unknown',
+}
diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py
new file mode 100644
index 00000000..2612f058
--- /dev/null
+++ b/asset_list/mappings/property_type.py
@@ -0,0 +1,25 @@
+# These are the standard categories for property types
+STANDARD_PROPERTY_TYPES = {
+    "house", "flat", "maisonette", "bungalow", "park home", "block house", "bedsit", "coach house",
+    "unknown", "other", "block of flats"
+}
+
+# This is a basic mapping that we use to map values that we've seen commonly to standard values
+PROPERTY_MAPPING = {
+    "HOUSE": "house",
+    "FLAT": "flat",
+    "MAISONET": "maisonette",
+    "BUNGALOW": "bungalow",
+    "BLKHOUS": "block house",
+    "blkhous": "block house",
+    "BEDSIT": "bedsit",
+    "COACHSE": "coach house",
+    "coachse": "coach house",
+    'Admin Unit Type': 'unknown',
+    'Block': 'block of flats',
+    'Bungalow': 'bungalow',
+    'Flat': 'flat',
+    'House': 'house',
+    'Maisonette': 'maisonette',
+    'Stairwell': 'other'
+}
diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py
new file mode 100644
index 00000000..78d64988
--- /dev/null
+++ b/asset_list/mappings/walls.py
@@ -0,0 +1,92 @@
+STANDARD_WALL_CONSTRUCTIONS = {
+    "uninsulated cavity", "filled cavity", "partial insulated cavity", "cavity unknown insulation",
+    "uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation",
+    "timber frame",
+    "system built", "granite or whinstone", "other", "unknown", "sandstone or limestone",
+    "cob",
+    "new build - average thermal transmittance",
+}
+
+WALL_CONSTRUCTION_MAPPINGS = {
+    "New Build - Average Thermal Transmittance": "new build - average thermal transmittance",
+    'Average thermal transmittance 0.25 W/m?K': 'unknown',
+    'Cavity wall, as built, insulated (assumed)': 'filled cavity',
+    'Average thermal transmittance 0.31 W/m?K': 'unknown',
+    'Cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
+    'Average thermal transmittance 0.30 W/m?K': 'unknown', 'Average thermal transmittance 0.28 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.25 W/m-¦K': 'unknown', 'Average thermal transmittance 0.21 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.20 W/m-¦K': 'unknown', 'Average thermal transmittance 0.29 W/m?K': 'unknown',
+    'Average thermal transmittance 0.16 W/m?K': 'unknown',
+    'Average thermal transmittance 0.27 W/m&#0178;K': 'unknown',
+    'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.18 W/m?K': 'unknown',
+    'Granite or whin, with internal insulation': 'granite or whinstone',
+    "Granite or whinstone, as built, insulated (assumed)": "granite or whinstone",
+    'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown',
+    'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown',
+    'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown',
+    'Average thermal transmittance 0.64 W/m?K': 'unknown', 'Average thermal transmittance 0.61 W/m?K': 'unknown',
+    'Sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
+    'Average thermal transmittance 0.33 W/m?K': 'unknown',
+    'Cavity wall,': "cavity unknown insulation",
+    'Cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
+    'Average thermal transmittance 0.29 W/m-¦K': 'unknown', 'Average thermal transmittance 0.32 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.19 W/m-¦K': 'unknown', 'Average thermal transmittance 0.27 W/m?K': 'unknown',
+    'Average thermal transmittance 0.22 W/m?K': 'unknown', 'Average thermal transmittance 0.38 W/m?K': 'unknown',
+    'Average thermal transmittance 0.26 W/m?K': 'unknown', 'Average thermal transmittance 0.27 W/m-¦K': 'unknown',
+    'Average thermal transmittance 0.18 W/m-¦K': 'unknown', 'Average thermal transmittance = 0.27 W/m?K': 'unknown',
+    'Cavity wall, with external insulation': 'filled cavity', 'Average thermal transmittance 0.21 W/m?K': 'unknown',
+    'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown',
+    'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown',
+    'Cavity wall, with internal insulation': 'filled cavity',
+    'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown',
+    'new build - average thermal transmittance': 'new build - average thermal transmittance',
+    'average thermal transmittance 0.25 w/m?k': 'unknown',
+    'cavity wall, as built, insulated (assumed)': 'filled cavity',
+    'average thermal transmittance 0.31 w/m?k': 'unknown',
+    'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
+    'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown',
+    'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m&#0178;k': 'unknown',
+    'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.18 w/m?k': 'unknown',
+    'granite or whin, with internal insulation': 'granite or whinstone',
+    'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown',
+    'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown',
+    'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown',
+    'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown',
+    'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
+    'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': "cavity unknown insulation",
+    'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
+    'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown',
+    'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown',
+    'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown',
+    'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown',
+    'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown',
+    'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown',
+    'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown',
+    'average thermal transmittance 0.28 w/m?k': 'unknown',
+    'Cavity wall, filled cavity': 'filled cavity',
+    'Cavity wall, filled cavity and external insulation': 'filled cavity',
+    'Granite or whinstone, as built, no insulation (assumed)': 'granite or whinstone',
+    'Solid brick, as built, insulated (assumed)': 'insulated solid brick',
+    'Solid brick, as built, no insulation (assumed)': 'uninsulated solid brick',
+    'Solid brick, with external insulation': 'insulated solid brick',
+    'Solid brick, with internal insulation': 'insulated solid brick',
+    'System built, as built, insulated (assumed)': 'system built',
+    'System built, as built, no insulation (assumed)': 'system built',
+    'System built, with external insulation': 'system built',
+    'System built, with internal insulation': 'system built',
+    'Timber frame, as built, insulated (assumed)': 'timber frame',
+    'Timber frame, as built, no insulation (assumed)': 'timber frame',
+    'Timber frame, as built, partial insulation (assumed)': 'timber frame',
+    'Timber frame, with additional insulation': 'timber frame',
+    'CAVITY': 'cavity unknown insulation',
+    'COMB': 'unknown',
+    'NONE': 'unknown',
+    'NOTKNOWN': 'unknown',
+    'SOLID': 'solid brick unknown insulation',
+}
diff --git a/asset_list/requirements.txt b/asset_list/requirements.txt
new file mode 100644
index 00000000..fd43ac64
--- /dev/null
+++ b/asset_list/requirements.txt
@@ -0,0 +1,12 @@
+postal
+pandas
+usaddress
+pydantic-settings==2.6.0
+epc-api-python==1.0.2
+fuzzywuzzy
+boto3
+openpyxl
+openai
+tiktoken
+msgpack
+beautifulsoup4
\ No newline at end of file
diff --git a/asset_list/tests/test_standardisation.py b/asset_list/tests/test_standardisation.py
new file mode 100644
index 00000000..b6d9a391
--- /dev/null
+++ b/asset_list/tests/test_standardisation.py
@@ -0,0 +1,5 @@
+from asset_list.AssetList import AssetList
+
+
+def test_multi_unit_address_flagging():
+    assert AssetList._identify_multi_address('Block (Rooms 1-4), 23 Clifton Hill, Newtown, Exeter, EX1 2DL')
diff --git a/backend/Funding.py b/backend/Funding.py
index f0780c51..2839c7ff 100644
--- a/backend/Funding.py
+++ b/backend/Funding.py
@@ -149,7 +149,8 @@ class Funding:
         :return:
         """
         measure_table = pd.DataFrame([
-            m for m in self.recommendations if m in measures and m["default"]
+            m for m in self.recommendations if
+            (m["type"] in measures) or (m["measure_type"] in measures) and m["default"]
         ])
 
         measure_table["post_install_sap"] = measure_table["sap_points"] + self.starting_sap
@@ -180,13 +181,10 @@ class Funding:
         measure_table["cost_minus_funding"] = measure_table["total"] - measure_table["estimated_funding"]
         measure_table["cost_minus_funding_per_sap"] = measure_table["cost_minus_funding"] / measure_table["sap_points"]
         measure_table = measure_table.sort_values(["cost_minus_funding_per_sap", "total"], ascending=[True, False])
-        # Recommend the measure, with estimated funding amount
-        recommended_measure = measure_table.head(1)
 
-        return {
-            "measure_type": recommended_measure["measure_type"],
-            "estimated_funding": recommended_measure["estimated_funding"]
-        }
+        return measure_table[
+            ["type", "measure_type", "Cost Savings", "estimated_funding"]
+        ].rename(columns={"Cost Savings": "project_score"}).to_dict("records")
 
     def sap_to_eco_band(self, sap_points):
         """
diff --git a/backend/Property.py b/backend/Property.py
index a495431f..eaffd54d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -395,6 +395,7 @@ class Property:
                     primary_recommendation_id=rec["recommendation_id"],
                     non_invasive_recommendations=self.non_invasive_recommendations,
                 )
+
                 self.recommendations_scoring_data.append(scoring_dict)
 
                 simulation_epc = self.epc_record.prepared_epc.copy()
@@ -1258,6 +1259,12 @@ class Property:
         if (self.building_id is not None) and (self.solar_panel_configuration is not None):
             return True
 
+        # If the property is in a conservation area, is listed or is a heriage building, solar panels
+        # become a difficult measure to generally get through planning restrictions and so we do not recommend
+        # solar panels
+        if self.restricted_measures:
+            return False
+
         is_valid_property_type = self.data["property-type"] in ["House", "Bungalow", "Maisonette"]
         is_valid_roof_type = (
             self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"]
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index c74a0b1f..0d921bec 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -208,9 +208,14 @@ class SearchEpc:
         try:
             # Updated regex to catch house numbers including alphanumeric ones
             pattern = r'(?i)(?:flat|apartment)\s*(\d+\w*)|^\s*(\d+\w*)'
-            match = re.search(pattern, address)
-            if match:
-                return next(g for g in match.groups() if g is not None)
+            match1 = re.search(pattern, address)
+            if match1:
+                return next(g for g in match1.groups() if g is not None)
+
+            pattern2 = r'(?i)(flat|apartment)\s*([a-zA-Z]?\d+[a-zA-Z]?)'
+            match2 = re.search(pattern2, address)
+            if match2:
+                return match2.group(2)
 
             parsed = usaddress.parse(address)
             # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
@@ -221,7 +226,8 @@ class SearchEpc:
                             continue
                         if part == postcode.split(" ")[1]:
                             continue
-                    return part  # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
+                    return part.rstrip(
+                        ",")  # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
                     # number
 
             # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found
@@ -331,6 +337,9 @@ class SearchEpc:
             if row["lmk-key"] not in seen and not seen.add(row["lmk-key"])
         ]
 
+        if data["rows"]:
+            api_response["msg"] = self.SUCCESS
+
         return api_response["msg"]
 
     def filter_rows(self, rows, property_type=None, address=None):
diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py
index 841ec2c1..8d0c05be 100644
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@@ -54,4 +54,5 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "Gas instantaneous at point of use": {"fuel": "Natural Gas", "cop": 0.85},
     "Room heaters, wood logs": {"fuel": "Wood Logs", "cop": 1},
     "Boiler and radiators, coal": {"fuel": "Coal", "cop": 0.85},
+    "From main system, no cylinderstat": {"fuel": "Natural Gas", "cop": 0.85},
 }
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 04a2ef7f..d82e774b 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -338,7 +338,7 @@ def extract_property_request_data(
 
     # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
     # we need to check existence of uprn
-    has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else True
+    has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False
     if has_uprn:
         has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None]
 
@@ -370,7 +370,7 @@ def extract_property_request_data(
         property_non_invasive_recommendations["recommendations"] = str(transformed)
 
     # Check if the valuation data has uprn
-    valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else True
+    valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else False
     if valuation_has_uprn:
         valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None]
 
@@ -639,8 +639,10 @@ async def trigger_plan(body: PlanTriggerRequest):
         recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
 
         recommendations_scoring_data = recommendations_scoring_data.drop(
-            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
-                     "carbon_ending"]
+            columns=[
+                "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                "carbon_ending"
+            ]
         )
 
         all_predictions = await model_api.async_paginated_predictions(
@@ -692,7 +694,8 @@ async def trigger_plan(body: PlanTriggerRequest):
                 Recommendations.calculate_recommendation_tenant_savings(
                     property_instance=property_instance,
                     kwh_simulation_predictions=kwh_simulation_predictions,
-                    property_recommendations=property_recommendations
+                    property_recommendations=property_recommendations,
+                    ashp_cop=body.ashp_cop
                 )
             )
             property_instance.current_energy_bill = property_current_energy_bill
@@ -822,7 +825,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 property_recommendations=recommendations[p.id],
                 project_scores_matrix=eco_project_scores_matrix,
                 whlg_eligible_postcodes=whlg_eligible_postcodes,
-                gbis_abs_rate=20,
+                gbis_abs_rate=15,
                 eco4_abs_rate=15,
             )
             funding_calulator.check_eligibiltiy()
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index f84912fe..618bec90 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -80,3 +80,5 @@ class PlanTriggerRequest(BaseModel):
     multi_plan: Optional[bool] = False
     optimise: Optional[bool] = True
     default_u_values: Optional[bool] = True
+
+    ashp_cop: Optional[float] = 2.8
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 720005d3..6d4852b2 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -1,5 +1,4 @@
 import numpy as np
-from scipy.constants import value
 
 
 class PropertyValuation:
@@ -216,6 +215,30 @@ class PropertyValuation:
             cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
         )
 
+        current_epc = property_instance.data["current-energy-rating"]
+
+        if not current_value:
+            return {
+                "current_value": 0,
+                "lower_bound_increased_value": 0,
+                "upper_bound_increased_value": 0,
+                "average_increased_value": 0,
+                "average_increase": 0
+            }
+
+        return cls.estimate_valuation_improvement(current_value, current_epc, target_epc, total_cost)
+
+    @classmethod
+    def estimate_valuation_improvement(cls, current_value, current_epc, target_epc, total_cost=None):
+        """
+        This function estimates the value of a property based on the current EPC rating and the target EPC rating
+        :param current_value:
+        :param current_epc:
+        :param target_epc:
+        :param total_cost:
+        :return:
+        """
+
         if not current_value:
             return {
                 "current_value": 0,
@@ -225,7 +248,6 @@ class PropertyValuation:
                 "average_increase": 0
             }
 
-        current_epc = property_instance.data["current-energy-rating"]
         # We get the spectrum of ratings between the current and target EPC
         epc_band_range = cls.EPC_BANDS[cls.EPC_BANDS.index(current_epc): cls.EPC_BANDS.index(target_epc) + 1]
 
diff --git a/backend/tests/test_search_epc.py b/backend/tests/test_search_epc.py
index 3b2e2a5b..562585ad 100644
--- a/backend/tests/test_search_epc.py
+++ b/backend/tests/test_search_epc.py
@@ -48,3 +48,12 @@ class TestSearchEpcIntegration:
         assert epc_searcher.newest_epc["lmk-key"] == lmk_key
         assert epc_searcher.newest_epc["uprn"] == uprn
         assert len(epc_searcher.older_epcs) == n_old_epcs
+
+    def test_search_housenumber(self):
+        eg1 = 'Flat A11, Mortimer House, Grendon Road, Exeter'
+        res1 = SearchEpc.get_house_number(eg1, None)
+        assert res1 == "A11"
+
+        eg2 = 'Flat A9, Mortimer House, Grendon Road, Exeter, EX1 2NL'
+        res2 = SearchEpc.get_house_number(eg2, None)
+        assert res2 == "A9"
diff --git a/etl/customers/l_and_g/ic_slides.py b/etl/customers/l_and_g/ic_slides.py
index 72dfc2c0..a5cb3511 100644
--- a/etl/customers/l_and_g/ic_slides.py
+++ b/etl/customers/l_and_g/ic_slides.py
@@ -132,7 +132,7 @@ def get_data(portfolio_id, scenario_ids):
     return properties_data, plans_data, recommendations_data
 
 
-properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[199])
+properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
 
 properties_df = pd.DataFrame(properties_data)
 plans_df = pd.DataFrame(plans_data)
@@ -240,4 +240,7 @@ df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
 df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
 df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
 
+df["Recommendation: Air Source Heat Pump"].sum()
+df["Cost: Air Source Heat Pump"].sum()
+
 df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)
diff --git a/etl/customers/lambeth/re-knocks.py b/etl/customers/lambeth/re-knocks.py
new file mode 100644
index 00000000..1de91b50
--- /dev/null
+++ b/etl/customers/lambeth/re-knocks.py
@@ -0,0 +1,23 @@
+import pandas as pd
+
+data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/Lambeth Reknocks.xlsx", sheet_name="Possible Route",
+    header=1
+)
+
+data["Outcomes"].value_counts()
+
+# Strip out: No
+
+df = data[data["Outcomes"] == "See notes"]
+notes_df = df[
+    ("Notes (If 'no answer' under outcomes, have you checked around the property for access issues where "
+     "possible?)")].value_counts().to_frame()
+
+example = df[df["Notes (If 'no answer' under outcomes, have you checked around the property for access issues where "
+                "possible?)"] == ('Access to rear of property only through number 10. Overgrown athe rear of property '
+                                  'installer wont be able to access')
+             ]
+
+# 18 did not attend
+#
diff --git a/etl/customers/panacap/assets.py b/etl/customers/panacap/assets.py
new file mode 100644
index 00000000..ec57d9a4
--- /dev/null
+++ b/etl/customers/panacap/assets.py
@@ -0,0 +1,61 @@
+import os
+
+import pandas as pd
+from dotenv import load_dotenv
+
+from etl.spatial.OpenUprnClient import OpenUprnClient
+from etl.route_march_data_pull.app import get_data
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+addresses = [
+    {"address": "3 Willis Road", "postcode": "CB1 2AQ"},
+    {"address": "22 Catharine Street", "postcode": "CB1 3AW"},
+    {"address": "332 Mill Road", "postcode": "CB1 3NN"},
+    {"address": "330 Mill Road", "postcode": "CB1 3NN"},
+    {"address": "328 Mill Road", "postcode": "CB1 3NN"},
+    {"address": "71 Mill Road", "postcode": "CB1 2AS"},
+    {"address": "78 Argyle Street", "postcode": "CB1 3LZ"},
+    {"address": "9 Graham Road", "postcode": "CB4 2ZE"},
+    {"address": "217 Mill Road", "postcode": "CB1 3BE"},
+    {"address": "374 Mill Road", "postcode": "CB1 3NN"},
+    {"address": "174 Thoday Street", "postcode": "CB1 3AX"},
+    {"address": "37 Abbey Road", "postcode": "CB5 8HH"},
+    {"address": "18 Upper Gwydir Street", "postcode": "CB1 2LR"},
+    {"address": "21 Fulbourn Road Fulbourn", "postcode": "CB1 9JL"},
+    {"address": "108 Argyle Street", "postcode": "CB1 3LS"},
+    {"address": "115 Victoria Road", "postcode": "CB4 3BS"},
+    {"address": "55 Ross Street", "postcode": "CB1 3BP"},
+    {"address": "16 Kingston Street", "postcode": "CB1 2NU"},
+    {"address": "13 Thoday Street", "postcode": "CB1 3AS"},
+    {"address": "103 York Street", "postcode": "CB1 2PZ"},
+]
+
+asset_list = pd.DataFrame(addresses)
+asset_list["row_id"] = asset_list.index
+
+epc_data, _, _ = get_data(
+    asset_list=asset_list, fulladdress_column="address", postcode_column="postcode", address1_column="address",
+    manual_uprn_map={}, epc_api_only=True
+)
+
+epc_df = pd.DataFrame(epc_data)
+epc_df.shape
+
+asset_list = asset_list.merge(
+    epc_df, how="left", on="row_id"
+)
+
+asset_list = asset_list.rename(columns={"address_x": "Address", "postcode_x": "Postcode"})
+asset_list["uprn"] = asset_list["uprn"].astype(str)
+
+spatial_data = OpenUprnClient.get_spatial_data([x["uprn"] for x in epc_data], bucket_name="retrofit-data-dev")
+spatial_data["UPRN"] = spatial_data["UPRN"].astype(str)
+
+asset_list = asset_list.merge(
+    spatial_data, how="left", left_on="uprn", right_on="UPRN"
+)
+
+asset_list.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Panacap/Acquisitions EPC Data.csv",
+                  index=False)
diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py
index 13cdc41b..fc3b7ec6 100644
--- a/etl/customers/remote_assessments/app.py
+++ b/etl/customers/remote_assessments/app.py
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
 from utils.s3 import save_csv_to_s3
 from etl.find_my_epc.AssetListEpcData import AssetListEpcData
 
-PORTFOLIO_ID = 126
+PORTFOLIO_ID = 134
 USER_ID = 8
 
 load_dotenv(dotenv_path="backend/.env")
@@ -19,22 +19,24 @@ def app():
 
     asset_list = [
         {
-            "address": "Garden Flat, 48 Bedminster Parade",
-            "postcode": "BS3 4HS",
-            "building_id": 1,
-            "uprn": 308249,
+            "address": "Flat 2, 42 Malden Road, London NW5 3HG",
+            "postcode": "NW5 3HG",
+            "uprn": 5117165,
         },
         {
-            "address": "Top Floor Flat, 48 Bedminster Parade",
-            "postcode": "BS3 4HS",
-            "building_id": 1,
-            "uprn": 308251
+            "address": "15 Bournville Lane",
+            "postcode": "B30 2JY",
+            "uprn": 100070301128
         },
         {
-            "address": "First Floor Flat, 48 Bedminster Parade",
-            "postcode": "BS3 4HS",
-            "building_id": 1,
-            "uprn": 308250,
+            "address": "34 Bournville Lane",
+            "postcode": "B30 2LN",
+            "uprn": 100070301140
+        },
+        {
+            "address": "36 Bournville Lane",
+            "postcode": "B30 2LN",
+            "uprn": 100070301142
         }
     ]
     asset_list = pd.DataFrame(asset_list)
@@ -65,20 +67,21 @@ def app():
 
     valuation_data = [
         {
-            "address": "Garden Flat, 48 Bedminster Parade",
-            "postcode": "BS3 4HS",
-            "valuation": 337_000
+            "uprn": 5117165,
+            "valuation": 467_000
         },
         {
-            "addresss": "Top Floor Flat, 48 Bedminster Parade",
-            "postcode": "BS3 4HS",
-            "valuation": 337_000
+            "uprn": 100070301128,
+            "valuation": 335_000
         },
         {
-            "address": "First Floor Flat, 48 Bedminster Parade",
-            "postcode": "BS3 4HS",
-            "valuation": 337_000
-        }
+            "uprn": 100070301140,
+            "valuation": 276_000
+        },
+        {
+            "uprn": 100070301142,
+            "valuation": 276_000
+        },
     ]
     # Store valuation data to s3
     valuation_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuation.csv"
diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py
index 8538188b..95fe4fcd 100644
--- a/etl/customers/stonewater/Wave 3 Preparation.py	
+++ b/etl/customers/stonewater/Wave 3 Preparation.py	
@@ -1,4 +1,7 @@
 import os
+from urllib import parse
+from fuzzywuzzy import fuzz
+
 import PyPDF2
 import re
 import pandas as pd
@@ -128,6 +131,7 @@ def extract_summary_report(pdf_path):
         "Current SAP Rating": None,
         "Current EPC Band": None,
         "Fuel Bill": None,
+        "Main Building Age Band": None,
         "Number of Storeys": None,
         "Window Age Description": None,
         "Window Age Description Proportion (%)": None,
@@ -177,6 +181,13 @@ def extract_summary_report(pdf_path):
         sap_match = re.search(r"Current SAP rating:\s*([A-Z] \d+)", text)
         data["Current SAP Rating"] = sap_match.group(1).split(" ")[1]
 
+        # Extract age
+        age_band_match = re.search(
+            r"3\.0 Date Built:\s*Main Property\s*[A-Z]?\s*(\d{4}-\d{4}|before \d{4}|\d{4} onwards)",
+            text
+        )
+        data["Main Building Age Band"] = age_band_match.group(1)
+
         # Number of storeys
         storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
         data["Number of Storeys"] = int(storeys_match.group(1))
@@ -465,7 +476,11 @@ def extract_building_parts_summary(text):
         r"Dimensions:\s*Dimension type: Internal\n(.*?)\n5\.0 Conservatory:", text, re.DOTALL
     )
     if not dimensions_section:
-        raise ValueError("Failed to locate dimensions section in the text.")
+        dimensions_section = re.search(
+            r"Dimensions:\s*Dimension type: External\n(.*?)\n5\.0 Conservatory:", text, re.DOTALL
+        )
+        if not dimensions_section:
+            raise ValueError("Failed to locate dimensions section in the text.")
 
     dimensions_text = dimensions_section.group(1)
 
@@ -694,6 +709,7 @@ def extract_epr(pdf_path):
         "Primary Energy Use (kWh/yr)": None,
         "Primary Energy Use Intensity (kWh/m2/yr)": None,
         "Number of Storeys": None,
+        "Main Building Age Band": None,
         "Fuel Bill": None,
         "Window Age Description": None,
         "Window Age Description Proportion (%)": None,
@@ -747,12 +763,38 @@ def extract_epr(pdf_path):
 
         # Extract Current and Potential SAP ratings
         sap_match = re.search(r"GG \(1-20\)\s*(\d{1,2})\s*(\d{1,2})", text)
-        current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2))
-        data["Current SAP Rating"] = current_sap
+        if sap_match is None:
+            # Handles the older format of the elmhurst EPR
+            # The text will look something like this:
+            # Least energy efficient - higher running costsD 61 - we extract D 61
+            sap_match = re.search(
+                r"(?P<current_epc>[A-G])\s(?P<current_sap>\d{1,3})(?P<potential_epc>[A-G])\s(?P<potential_sap>\d{1,3})",
+                text)
+            data["Current EPC Band"] = sap_match.group("current_epc")
+            data["Current SAP Rating"] = int(sap_match.group("current_sap"))
+        else:
+            current_sap, _ = int(sap_match.group(1)), int(sap_match.group(2))
+            data["Current SAP Rating"] = current_sap
 
         # Extract the primary energy use intensity
         additional_rating_match = re.search(r"Additional ratings for your home\s*([\d.]+)", text)
-        data["Primary Energy Use Intensity (kWh/m2/yr)"] = float(additional_rating_match.group(1))
+        if additional_rating_match:
+            data["Primary Energy Use Intensity (kWh/m2/yr)"] = float(additional_rating_match.group(1))
+        else:
+            # Handles the older format of the Elmhurst EPR
+            primary_energy_match = re.search(r"actual consumption\.\n(?P<primary_energy>\d+)", text)
+            data["Primary Energy Use (kWh/yr)"] = int(primary_energy_match.group("primary_energy"))
+            # We calculate the primary energy use intensity by dividing by floor area
+            floor_area = re.search(r"Total Floor Area\s(?P<floor_area>\d+)\s?m2", text).group("floor_area")
+            data["Primary Energy Use Intensity (kWh/m2/yr)"] = data["Primary Energy Use (kWh/yr)"] / int(floor_area)
+
+        # Extract age band
+        age_band_match = re.search(
+            r"Building part:\s*Main\s*-\s*built in\s*(?:[A-Z]\s*)?(\d{4}-\d{4}|before \d{4}|\d{4} onwards)",
+            text
+        )
+
+        data["Main Building Age Band"] = age_band_match.group(1)
 
         # Extract Number of Storeys
         storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
@@ -880,11 +922,18 @@ def detect_report_type(pdf_path, pdf_file):
     """
     # Attempt to read the first page of the PDF to determine type
     with open(pdf_path, "rb") as file:
+        # This code raises some warnings like Multiple definitions in dictionary at byte 0x1ab for key /Filter
+        # This is because the pdf is irregular. We could possibly try a library like fitz to handle this
         reader = PyPDF2.PdfReader(file)
         first_page_text = reader.pages[0].extract_text() if reader.pages else ""
+        n_pages = len(reader.pages)
 
-        if is_energy_report(first_page_text):
+        if is_energy_report(first_page_text) and n_pages > 3:
+            # The EPR should have more than 3 pages
             return "epr"
+        elif is_energy_report(first_page_text) and n_pages <= 3:
+            # This is a shortened version of the EPR which isn't massively useful
+            return "short_form_epr"
         elif "summary" in pdf_file.lower() or is_summary_report(first_page_text):
             return "summary"
         elif is_condition_report(first_page_text):
@@ -1675,7 +1724,6 @@ def append_stonewater_id():
     )
     model_proposed_sample = model_proposed_sample[~pd.isnull(model_proposed_sample["Address ID"])]
     model_proposed_sample["Address ID"] = model_proposed_sample["Address ID"].astype(int)
-    z = model_proposed_sample["Archetype ID"].drop_duplicates().sort_values()
 
     original_archetypes = pd.read_excel(
         "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
@@ -2906,6 +2954,14 @@ def identify_incorrect_packages():
     )
 
 
+def extract_sharepoint_url(x):
+    if pd.isnull(x):
+        return ""
+    return "/".join(parse.urlparse(
+        x.split(" - http")[1]
+    ).path.replace("%20", " ").split("/")[-2:])
+
+
 def revised_model():
     """
     This function implements the revised model for Stonewater, where we are looking at new priority postcodes
@@ -2913,7 +2969,6 @@ def revised_model():
     """
 
     # 1) Create the new list of properties
-
     new_priority_postcodes = pd.read_excel(
         "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 "
         "priority list.xlsx"
@@ -2927,16 +2982,1312 @@ def revised_model():
     original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
     original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
     original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
+    original_archetypes["UPRN"] = original_archetypes["UPRN"].astype("Int64").astype(str)
 
-    original_archetypes = original_archetypes[
-        ["Address ID", "Archetype ID", ""]
-    ]
+    wave_21_folder_name = "Wave 2.1 Surveys - 2"
 
     # Check if we have all of the addresses
     missed = original_archetypes[
         ~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values)
     ]["Archetype ID"].unique()
-    assert
+
+    assert set(missed) == {'NOT PRIORITY POSTCODE', 'IN WAVE 2.1', 'EPC C OR ABOVE'}
+
+    original_archetypes = original_archetypes[
+        ["Address ID", "Archetype ID", "Archetype Group Rank", "UPRN"]
+    ]
+
+    # Merge these archetypes on to the new priority postcodes
+    new_priority_postcodes = new_priority_postcodes.merge(
+        original_archetypes, how="left", on="Address ID"
+    )
+
+    # Basic check, should have no rows with missing Archetype ID, where
+    assert float(new_priority_postcodes[pd.isnull(new_priority_postcodes["Archetype ID"])]["Address ID"].isin(
+        original_archetypes["Address ID"]
+    ).sum()) == 0
+
+    # We pull together the survey data sheet
+    survey_folders = []
+
+    # Loop over each survey folder and list its contents
+    for i in range(1, NUM_FOLDERS + 1):
+        folder_path = os.path.join(CUSTOMER_FOLDER_PATH, f"StonewaterSurveys_{i}")
+        if os.path.isdir(folder_path):  # Check if folder exists
+            folder_contents = [os.path.join(f"StonewaterSurveys_{i}", file) for file in os.listdir(folder_path)]
+            survey_folders.extend(folder_contents)  # Append contents to the master list
+
+    wave_21_folders = [
+        "1. Herefordshire",
+        "2. Bedfordshire",
+        "3. Wiltshire",
+        "4. Bournemouth",
+        "5. Coventry",
+        "6. West Sussex",
+        "7. Dorset",
+        "8. Cambridgeshire",
+        "9. Guildford",
+        "10. Little Island",
+        "11. CCS Dorset"
+    ]
+
+    for wave_2_1_folder in wave_21_folders:
+        folder_path = os.path.join(CUSTOMER_FOLDER_PATH, wave_21_folder_name, wave_2_1_folder)
+        if os.path.isdir(folder_path):  # Check if folder exists
+            folder_contents = [os.path.join(wave_21_folder_name, wave_2_1_folder, file) for file in
+                               os.listdir(folder_path)]
+            survey_folders.extend(folder_contents)  # Append contents to the master list
+
+    # We now do a large pull of all of the data
+    extracted_data = []
+    mtp_extracted_data = []  # Additional data to extract from the medium term plans
+    for survey_folder in tqdm(survey_folders):
+        survey_folder_path = os.path.join(CUSTOMER_FOLDER_PATH, survey_folder)
+
+        # Check that the survey folder is actually a folder
+        if not os.path.isdir(survey_folder_path):
+            continue
+
+        # List the folders inside of the survey folder
+        survey_subfolders = [
+            name for name in os.listdir(survey_folder_path)
+            if os.path.isdir(os.path.join(survey_folder_path, name))
+        ]
+
+        # Check if there's a "retrofit assessment" folder
+        retrofit_folder = next((name for name in survey_subfolders if "retrofit assessment" in name.lower()), None)
+
+        ra_folder = next(
+            (name for name in survey_subfolders if "ra coordinator info" in name.lower() or "ra info" in name.lower()),
+            None
+        )
+
+        mtp_folder = next(
+            (name for name in survey_subfolders if "mid-term" in name.lower() or "mtp" in name.lower()),
+            None
+        )
+        if mtp_folder:
+            # We have a mid term plan:
+            mtp_folder_path = os.path.join(survey_folder_path, mtp_folder)
+            # Get the contents - files and not folder
+            mtp_contents = [
+                os.path.join(mtp_folder, file) for file in os.listdir(mtp_folder_path)
+                if ".DS_Store" not in file and not os.path.isdir(os.path.join(mtp_folder_path, mtp_folder, file))
+            ]
+
+            has_v1 = [
+                f for f in mtp_contents if "v1" in f.lower() or "/ss" in f.lower()
+            ]
+
+            if has_v1:
+                # Then we go one level deeper
+                mtp_contents = [
+                    os.path.join(has_v1[0], f) for f in
+                    os.listdir(os.path.join(survey_folder_path, has_v1[0]))
+                ]
+
+            # We check the the IMA
+            for file_name in mtp_contents:
+
+                filepath = os.path.join(survey_folder_path, file_name)
+                # We expect a pdf so try and parse it
+                try:
+                    with open(filepath, "rb") as file:
+                        reader = PyPDF2.PdfReader(file)
+                        # Just the first page
+                        text = reader.pages[0].extract_text()
+
+                except Exception as e:
+                    continue
+
+                # We check if this is an IMA
+                ima_heading_search = re.search(
+                    r"Improvement measure\s+Capital Cost\s+Lifetime of\s*\n\s*measureFuel saving\s*Lifetime fuel", text
+                )
+
+                is_ima = bool(ima_heading_search)
+                if not is_ima:
+                    continue
+
+                # Otherwise, extract: RIR, PV
+                pv_search = re.search(r"PV \(\d+Kwp\)", text)
+                has_pv = bool(pv_search)
+                pv_system = pv_search.group(0) if has_pv else None
+
+                # We perform a second search for PV:
+                if pv_search is None:
+                    pv_search = re.search("solar pv", text.lower())
+                    has_pv = bool(pv_search)
+                    pv_system = "Solar PV" if has_pv else None
+
+                rir_search = re.search(r"RIR \(\d+(\.\d+)?\)", text)
+                has_rir = bool(rir_search)
+                rir_spec = rir_search.group(0) if has_rir else None
+
+                mtp_extracted_data.append({
+                    "survey_folder": survey_folder,
+                    "has_pv": has_pv,
+                    "PV System": pv_system,
+                    "RIR Specification": rir_spec,
+                    "has_rir": has_rir
+                })
+                continue
+
+        # If retrofit assessment folder exists, check if it has content
+        if retrofit_folder or ra_folder:
+            if retrofit_folder:
+                retrofit_folder_path = os.path.join(survey_folder_path, retrofit_folder)
+            else:
+                retrofit_folder_path = os.path.join(survey_folder_path, ra_folder)
+
+            # Check if everything inside is a sub-folder and the number of folders is 2
+            items = [item for item in os.listdir(retrofit_folder_path) if item != '.DS_Store']
+            all_folders = [os.path.isdir(os.path.join(retrofit_folder_path, item)) for item in items]
+            if all(all_folders) and len(all_folders) == 2 and "Property Pics" in items:
+                # Get the folder that isn't Property Pics
+                retrofit_folder_path = os.path.join(
+                    retrofit_folder_path, [item for item in items if item != "Property Pics"][0]
+                )
+
+            if os.listdir(retrofit_folder_path):  # If not empty
+                summary_data = extract_retrofit_pdfs(data_folder_path=retrofit_folder_path)
+                if summary_data:
+                    summary_data = {
+                        "survey_folder": survey_folder,
+                        **summary_data,
+                    }
+                    extracted_data.append(summary_data)
+                    continue
+            else:
+                # Then we have an empty Retrofit Assessment folder
+                continue
+
+        # If no retrofit folder or it was empty, check files in survey_folder
+        summary_data = extract_retrofit_pdfs(data_folder_path=survey_folder_path)
+        if not summary_data:
+            if len(survey_subfolders) == 1:
+                survey_folder_path = os.path.join(survey_folder_path, survey_subfolders[0])
+                summary_data = extract_retrofit_pdfs(data_folder_path=survey_folder_path)
+
+        if summary_data:
+            summary_data = {
+                "survey_folder": survey_folder,
+                **summary_data,
+            }
+            extracted_data.append(summary_data)
+
+    retrofit_assessment_data = pd.DataFrame(extracted_data)
+    mtp_df = pd.DataFrame(mtp_extracted_data)
+
+    # Save
+    # retrofit_assessment_data.to_csv(
+    #     os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 5.csv"), index=False
+    # )
+    # mtp_df.to_csv(
+    #     os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 5.csv"), index=False
+    # )
+    retrofit_assessment_data = pd.read_csv(
+        os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/Retrofit Assessment Data Sheet 5.csv"),
+    )
+    mtp_df = pd.read_csv(
+        os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project/MTP Data Sheet 5.csv"),
+    )
+
+    # There are a few duplicates we just manually drop
+    mtp_df = mtp_df.drop_duplicates()
+    mtp_df = mtp_df[
+        ~((
+              mtp_df["survey_folder"] == "Wave 2.1 Surveys - 2/1. Herefordshire/(043) Manor Fields 27"
+          ) & (~mtp_df["has_pv"]))
+    ]
+
+    mtp_df = mtp_df[
+        ~((
+              mtp_df["survey_folder"] == "Wave 2.1 Surveys - 2/2. Bedfordshire/(147) Gilpin Close 5"
+          ) & (~mtp_df["has_pv"]))
+    ]
+
+    # Remove some definite duplicates
+    dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].duplicated()]["Address"]
+    dupes = retrofit_assessment_data[retrofit_assessment_data["Address"].isin(dupes)]
+    dupes = dupes.sort_values("Address")
+    # Get all of the folders that end with ROSS
+    to_drop = dupes[dupes["survey_folder"].str.endswith("ROSS")]["survey_folder"].unique().tolist()
+
+    # Replace \n with ""
+    retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "")
+
+    retrofit_assessment_data = retrofit_assessment_data[
+        ~retrofit_assessment_data["survey_folder"].isin(
+            [
+                "StonewaterSurveys_10/4 Beech Road, LUTON, LU1 1DP ROSS",
+                "StonewaterSurveys_2/135 Runley Road, LUTON, LU1 1TX ROSS",
+                "StonewaterSurveys_13/7 Saxon Road, LUTON, LU3 1JR ROSS"
+            ] + to_drop
+        )
+    ]
+
+    retrofit_assessments_data_columns = [
+        'Current SAP Rating', 'Current EPC Band', 'Primary Energy Use (kWh/yr)',
+        'Primary Energy Use Intensity (kWh/m2/yr)', 'Number of Storeys',
+        'Fuel Bill', 'Window Age Description',
+        'Window Age Description Proportion (%)',
+        'Secondary Window Age Description',
+        'Secondary Window Age Description Proportion (%)', 'Number of Windows',
+        'Total Number of Doors', 'Number of Insulated Doors',
+        'Existing Primary Heating System',
+        'Existing Primary Heating PCDF Reference',
+        'Existing Primary Heating Controls',
+        'Existing Primary Heating % of Heat',
+        'Existing Secondary Heating System',
+        'Existing Secondary Heating PCDF Reference',
+        'Existing Secondary Heating Controls',
+        'Existing Secondary Heating % of Heat', 'Secondary Heating Code',
+        'Water Heating Code', 'Total Floor Area (m2)',
+        'Total Ground Floor Area (m2)', 'RIR Floor Area',
+        'Main Building Wall Area (m2)', 'First Extension Wall Area (m2)',
+        'Number of Light Fittings', 'Number of LEL Fittings',
+        'Number of fittings needing LEL', 'Main Roof Type',
+        'Main Roof Insulation', 'Main Roof Insulation Thickness',
+        'Main Wall Type', 'Main Wall Insulation', 'Main Wall Dry-lining',
+        'Main Wall Thickness', 'Main Building Alternative Wall Type',
+        'Main Building Alternative Wall Insulation',
+        'Main Building Alternative Wall Dry-lining',
+        'Main Building Alternative Wall Thickness',
+        'Main Fuel',
+        'Main Building Age Band',
+    ]
+    # For the columns in retrofit_assessments_data_columns, prefix all of them with Survey:
+    retrofit_assessments_data_columns_prefixed = ["Survey: " + x for x in retrofit_assessments_data_columns]
+    rename_dict = dict(zip(retrofit_assessments_data_columns, retrofit_assessments_data_columns_prefixed))
+    retrofit_assessment_data = retrofit_assessment_data.rename(columns=rename_dict)
+    retrofit_assessment_data["Survey: Current EPC Band"] = (
+        retrofit_assessment_data["Survey: Current SAP Rating"].apply(lambda x: sap_to_epc(x))
+    )
+
+    # We can read in the data as needed
+
+    # Next Step: Read in the coordinated measures and match to the extracted data
+    ############################################################
+    # CCS
+    #############################################################
+    ccs_coordination_sheet = pd.read_excel(
+        os.path.join(
+            CUSTOMER_FOLDER_PATH,
+            "Jan 2025 Project",
+            "CCS_Installation_Compliance_-_Stonewater_SHDF_2_1_1738228227.xlsx"
+        ),
+        header=4
+    )
+    ccs_postcodes = pd.read_excel(
+        os.path.join(
+            CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "CCS_Installation_Compliance_CCS.xlsx"
+        ),
+        header=4
+    )
+    ccs_coordination_sheet = ccs_postcodes[['Name', 'Post Code', 'Asset ID', 'Asset ID.1']].merge(
+        ccs_coordination_sheet, how="left", on="Name"
+    )
+    ccs_coordination_sheet = ccs_coordination_sheet[~pd.isnull(ccs_coordination_sheet["Name"])]
+    ccs_coordination_sheet["contractor"] = "CCS"
+    # We split ccs into two sections - the first being
+    ccs_coordination_removed_from_programme = ccs_coordination_sheet.tail(21)
+    ccs_coordination_sheet = ccs_coordination_sheet.head(87)
+    ccs_coordination = pd.concat([ccs_coordination_removed_from_programme, ccs_coordination_sheet])
+
+    ccs_coordination["folder_path"] = ccs_coordination["Sharepoint Link"].apply(lambda x: extract_sharepoint_url(x))
+
+    ############################################################
+    # WATES
+    #############################################################
+    wates_coordination_sheet = pd.read_excel(
+        os.path.join(
+            CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_1738229226.xlsx"
+        ),
+        header=4
+    )
+    wates_postcodes = pd.read_excel(
+        os.path.join(
+            CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "Stonewater_SAP_Installation_Compliance_Vinci-Wates.xlsx"
+        ),
+        header=4
+    )
+    wates_postcodes = wates_postcodes[~pd.isnull(wates_postcodes["Post Code"])]
+    wates_coordination_sheet = wates_coordination_sheet.merge(
+        wates_postcodes[['Name', 'Post Code', 'Asset ID']].drop_duplicates(), how="left", on="Name"
+    )
+
+    wates_coordination_sheet["contractor"] = "Wates"
+    # Break into the different sites:
+    # Wiltshire
+    wates_coordination_sheet_wiltshere = wates_coordination_sheet.head(267)
+    wates_coordination_sheet_herefordshire = wates_coordination_sheet.iloc[271:332, :]
+    wates_coordination_sheet_coventry = wates_coordination_sheet.iloc[336:409, :]
+    wates_coordination_sheet_bedfordshire = wates_coordination_sheet.iloc[413:520, :]
+    wates_coordination_sheet_bournemouth = wates_coordination_sheet.iloc[524:567, :]
+    wates_coordination_sheet_cambridgeshire = wates_coordination_sheet.iloc[571:581, :]
+    wates_coordination_sheet_removed_from_programme = wates_coordination_sheet.iloc[586:926, :]
+    wates_coordination_sheet_abeyance = wates_coordination_sheet.iloc[930:972, :]
+
+    wates_coordination = pd.concat(
+        [
+            wates_coordination_sheet_wiltshere,
+            wates_coordination_sheet_herefordshire,
+            wates_coordination_sheet_coventry,
+            wates_coordination_sheet_bedfordshire,
+            wates_coordination_sheet_bournemouth,
+            wates_coordination_sheet_cambridgeshire,
+            wates_coordination_sheet_removed_from_programme,
+            wates_coordination_sheet_abeyance
+        ]
+    )
+    # We correct the Asset ID for 34 Kempster Close
+    wates_coordination["Asset ID"] = np.where(
+        wates_coordination["Name"] == "34 Kempster Close",
+        "12005",
+        wates_coordination["Asset ID"]
+    )
+
+    # We fill the missing ids
+    missing_lookup = {
+        "4  Sydnall Fields": 31231,
+        "12  Sydnall Fields": 31239,
+        "12  Athena Gardens": 28061,
+        "49  Banner Lane": 41189,
+        "4  Jonathan Road": 41232,
+        "8  Jonathan Road": 41236,
+        "1  Jonathan Road": 41229,
+        "96  Taunton Way": 31417,
+        "94  Taunton Way": 31418,
+        "1  Lady Lane": 29430,
+        "10  Jonathan Road": 41283,
+        "21  Jonathan Road": 41246,
+        "12  Ashcroft Close": 26399
+    }
+    for name, asset_id in missing_lookup.items():
+        wates_coordination["Asset ID"] = np.where(
+            wates_coordination["Name"] == name,
+            asset_id,
+            wates_coordination["Asset ID"]
+        )
+
+    wates_coordination = wates_coordination[~pd.isnull(wates_coordination["Asset ID"])]
+
+    wates_coordination["folder_path"] = wates_coordination["Sharepoint Folder"].apply(
+        lambda x: extract_sharepoint_url(x)
+    )
+
+    ############################################################
+    # NEW 450 COORDINATED RETROFIT ASSESSMENTS
+    #############################################################
+    features = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
+        "master sheet.csv",
+        encoding='latin1'
+    )
+    features["Address ID"] = features["Address ID"].astype(str).astype(int)
+    features_to_merge = features[["Address ID", "Organisation Reference"]]
+
+    retrofit_packages_board = pd.read_excel(
+        os.path.join(
+            CUSTOMER_FOLDER_PATH,
+            "Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1732034933 Final 19.11.24.xlsx"
+        ),
+        header=4
+    )
+    retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])]
+    # Take just the rows that have been surveyed
+    retrofit_packages_board = retrofit_packages_board[
+        retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
+    ]
+
+    retrofit_packages_board = retrofit_packages_board.merge(
+        features_to_merge, how="left", on="Address ID"
+    )
+
+    manual_filters = {
+        "Flat 21 Walmer Street": "StonewaterSurveys_14/91-1-Flat 21 Walmer Street-HR4 9JD",
+        "6 Cornewall Close": "StonewaterSurveys_14/aa 6, Cornewall Close, Moccas, HEREFORD, HR2 9LG",
+        "2 Bromyard Road": "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ",
+        'Flat 18, 1 Raglan Court': "StonewaterSurveys_13/60-3-18 Raglan Court, 1 Raglan Court-MK41 8QT",
+        '14 Raglan Court, 1 Devizes Avenue': 'StonewaterSurveys_12/55-3-14 Raglan Court, Devizes Avenue-MK41 8QT',
+        '19 South Road': 'StonewaterSurveys_4/19 The Oaks, South Road, SMETHWICK, B67 7BY',
+        'Flat 12 Pelican Lane': 'StonewaterSurveys_1/121-3-Flat 12 Lynton Court, Pelican Lane-RG14 1NN',
+        'Flat C, 44 St Leonards Avenue': 'StonewaterSurveys_11/427-2-44c St. Leonards Avenue-MK42 0RB',
+        '16 The Crescent, Kington': 'StonewaterSurveys_9/360-3-16 The Crescent-HR5 3AS',
+        '2 School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA',
+        '14 South Road': 'StonewaterSurveys_2/14 The Oaks, South Road, SMETHWICK, B67 7BY',
+        '1 Groves Street': 'StonewaterSurveys_4/19-5-1 Groves Street-SN2 2BW',
+        '2 Calshot Walk': 'StonewaterSurveys_3/156-3-2 Calshot Walk-MK41 8QS',
+        '21 Constitution Hill': 'StonewaterSurveys_1/112-11-21 Constitution Hill-BH14 0PX',
+        '22 Constitution Hill': 'StonewaterSurveys_4/185-8-22 Constitution Hill-BH14 0PX',
+        '2 Marches Cottages, School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA',
+        '26, Copthorn House, Brighton Road': 'StonewaterSurveys_15/133-1-26 Brighton Road-KT20 6BQ',
+        '4, Old St Marys, Ripley Lane': "StonewaterSurveys_15/433-3-4 Ripley Lane-KT24 6JG",
+        '1 Nelson House, Short Street': 'StonewaterSurveys_15/89-2-1 Short Street-GU11 1HX',
+        "18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX',
+        '3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX',
+        '16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ',
+        '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX',
+        '7 Croft Street': 'StonewaterSurveys_8/333-2-7 Croft Street-HR6 8LA'
+    }
+
+    # We now match this retrofit packages board to the extracted data
+    matching_lookup = []
+    for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
+
+        # Handle the case that has the wrong postcode in the asset data
+        if home["Name"] in manual_filters:
+            filtered = retrofit_assessment_data[
+                retrofit_assessment_data["survey_folder"] == manual_filters[home["Name"]]
+                ].copy()
+        else:
+            filtered = retrofit_assessment_data[
+                retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower()
+                ].copy()
+
+            # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
+            to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
+                home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
+            )
+            if to_filter.sum() == 0:
+                to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.replace(",", "").str.replace(".",
+                                                                                                                   "").str.contains(
+                    home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
+                )
+            filtered = filtered[to_filter]
+
+        if filtered.empty:
+            continue
+
+        if filtered.shape[0] == 1:
+            matching_lookup.append(
+                {
+                    "survey_folder": filtered["survey_folder"].values[0],
+                    "Address ID": home["Address ID"],
+                    "Name": home["Name"]
+                }
+            )
+            continue
+
+        # home["Name"] should be contained in the survey_folder
+        filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
+        # We have an edge case wher some properties have two outputs in Sharepoint
+        if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
+            raise Exception("Fix me1")
+            # filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
+
+        if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB':
+            raise Exception("Fix me2")
+            # filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"]
+
+        if home["Name"] == '2 Bromyard Road' and home["Postcode"] == 'WR15 8BZ':
+            filtered = filtered[filtered["survey_folder"] == "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ"]
+
+        if filtered.empty:
+            continue
+        if filtered.shape[0] != 1:
+            raise Exception("something went wrong")
+
+        matching_lookup.append(
+            {
+                "survey_folder": filtered["survey_folder"].values[0],
+                "Address ID": home["Address ID"],
+                "Name": home["Name"]
+            }
+        )
+    matching_lookup = pd.DataFrame(matching_lookup)
+
+    ccs_coordination = ccs_coordination.rename(
+        columns={"Post Code": "Postcode"}
+    )
+    ccs_coordination = ccs_coordination[~pd.isnull(ccs_coordination["Postcode"])]
+    ccs_coordination = ccs_coordination[ccs_coordination["Retrofit Assessment"] != "Outstanding"]
+
+    ccs_manual_filters = {
+        "35 Kittiwake Close": f"{wave_21_folder_name}/11. CCS Dorset/Kittiwake Close 35"
+    }
+    ccs_matching_lookup = []
+    for _, home in tqdm(ccs_coordination.iterrows(), total=len(ccs_coordination)):
+
+        # Handle the case that has the wrong postcode in the asset data
+        if home["Name"] in ccs_manual_filters:
+            filtered = retrofit_assessment_data[
+                retrofit_assessment_data["survey_folder"] == ccs_manual_filters[home["Name"]]
+                ].copy()
+        else:
+            filtered = retrofit_assessment_data[
+                retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower()
+                ].copy()
+
+            # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
+            to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
+                home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
+            )
+            if to_filter.sum() == 0:
+                to_filter = (
+                    filtered["survey_folder"].
+                    str.replace(r"[^\w\s]", "").
+                    str.replace(",", "").
+                    str.replace(".", "").
+                    str.contains(
+                        home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
+                    )
+                )
+            if to_filter.sum() == 0:
+                to_filter = (
+                    filtered["Address"].str.replace("  ,", "").str.split(",").str[0:2].str.join("").str.lower() ==
+                    home["Name"].lower()
+                )
+            if to_filter.sum() == 0:
+                to_filter = (
+                    filtered["Address"].str.replace("  ,", "").str.split(",").str[0:1].str.join("").str.lower() ==
+                    home["Name"].lower()
+                )
+            if to_filter.sum() == 0:
+                # Do a fuzzy match on the name
+                # Find the best filter
+                to_filter = filtered["Address"].str.replace("  ,", "").str.split(",").str[0:2].str.join("").apply(
+                    lambda x: fuzz.partial_ratio(home["Name"], x) > 93
+                )
+            if to_filter.sum() == 0:
+                # We also some cases where the name of the survey folder is like "Colville Road 7" and the
+                # property name is actually 7 Colville Road, so we try taking the final part of the address,
+                # splitting on space, and adding it to the front
+                def reformat_survey_folder(x):
+                    filename = x.split("/")[-1]
+                    parts = filename.split(" ")
+                    return " ".join(parts[-1:] + parts[:-1])
+
+                to_filter = (
+                    filtered["survey_folder"].apply(lambda x: reformat_survey_folder(x)).str.lower() ==
+                    home["Name"].lower()
+                )
+
+            if to_filter.sum() == 0:
+                raise Exception("Error")
+            filtered = filtered[to_filter]
+
+        if filtered.empty:
+            continue
+
+        if filtered.shape[0] == 1:
+            ccs_matching_lookup.append(
+                {
+                    "survey_folder": filtered["survey_folder"].values[0],
+                    "Asset ID.1": home["Asset ID.1"],
+                    "Name": home["Name"]
+                }
+            )
+            continue
+
+        raise Exception("No match")
+
+    ccs_matching_lookup = pd.DataFrame(ccs_matching_lookup)
+    # We get a match for all records
+    assert ccs_matching_lookup.shape[0] == ccs_coordination.shape[0]
+    assert not pd.isnull(ccs_matching_lookup["Asset ID.1"]).sum()
+    assert not ccs_matching_lookup["Asset ID.1"].duplicated().sum()
+
+    # We do the same for Wates
+    wates_coordination = wates_coordination.rename(
+        columns={"Post Code": "Postcode"}
+    )
+    wates_coordination = wates_coordination[
+        wates_coordination["Retrofit Assessment"].isin(["Completed"])
+    ]
+    wates_coordination = wates_coordination[
+        ~pd.isnull(wates_coordination["Postcode"])
+    ]
+
+    wates_manual_filters = {
+        "24 Rabley Wood View": f"{wave_21_folder_name}/3. Wiltshire/24-25 Rabley Wood View",
+        "14 Edencroft": f"{wave_21_folder_name}/3. Wiltshire/14 Edencroft",
+        "Flat 31 Rabley Wood View": f"{wave_21_folder_name}/3. Wiltshire/Flat 31  Rabley Wood View",
+        'Flat 13, Manor Fields': f'{wave_21_folder_name}/1. Herefordshire/(038) Manor Fields Flat 13',
+        "4 Kittys Lane": f"{wave_21_folder_name}/1. Herefordshire/(005) Kittys Lane 4",
+        '1 Jephson Court': f'{wave_21_folder_name}/5. Coventry/Jesphson Court 1',
+        '2 Jephson Court': f'{wave_21_folder_name}/5. Coventry/Jesphson Court 2',
+    }
+    wates_matching_lookup = []
+    # Examples to skip when we cannot get the data
+    wates_to_skip = [
+        "66 Abbatt Close",  # File type is unusual, couldn't extract the data
+        "Flat 69 Goddard Road",  # Doesn't exist
+        "19 Garth House",  # # File type is unusual, couldn't extract the data
+        '5 Gilpin Close',  # No properly formatted EPR
+        '49 The Hide, Netherfield',  # TODO: TEMP HERE
+        '19 Chanders Rd',
+        '5 Chanders Rd',
+        '23 Chanders Rd',
+        '3 Chanders Rd',
+        '1 Orchard Close',
+    ]
+    wates_coordination = wates_coordination[~wates_coordination["Name"].isin(wates_to_skip)]
+
+    for _, home in tqdm(wates_coordination.iterrows(), total=len(wates_coordination)):
+
+        # Search the folder
+        filtered = retrofit_assessment_data[
+            retrofit_assessment_data["survey_folder"].str.contains(home["folder_path"], regex=False)
+        ]
+        if len(filtered) == 1:
+            wates_matching_lookup.append(
+                {
+                    "survey_folder": filtered["survey_folder"].values[0],
+                    "Asset ID": home["Asset ID"],
+                    "Name": home["Name"]
+                }
+            )
+            continue
+
+        if home["Name"] in wates_to_skip:
+            continue
+
+        # Handle the case that has the wrong postcode in the asset data
+        if home["Name"] in wates_manual_filters:
+            filtered = retrofit_assessment_data[
+                retrofit_assessment_data["survey_folder"] == wates_manual_filters[home["Name"]]
+                ].copy()
+        else:
+            filtered = retrofit_assessment_data[
+                retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower()
+                ].copy()
+
+            # We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
+            to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
+                home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
+            )
+
+            if to_filter.sum() > 1:
+                to_filter = (
+                    filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.split("/").str[-1].str.lower() ==
+                    home["Name"].replace(r"[^\w\s]", "").lstrip().lower()
+                )
+
+            if to_filter.sum() == 0:
+                to_filter = (
+                    filtered["survey_folder"].
+                    str.replace(r"[^\w\s]", "").
+                    str.replace(",", "").
+                    str.replace(".", "").
+                    str.contains(
+                        home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
+                    )
+                )
+            if to_filter.sum() == 0:
+                to_filter = (
+                    filtered["Address"].str.replace("  ,", "").str.split(",").str[0:2].str.join("").str.lower() ==
+                    home["Name"].lower()
+                )
+            if to_filter.sum() == 0:
+                to_filter = (
+                    filtered["Address"].str.replace("  ,", "").str.split(",").str[0:1].str.join("").str.lower() ==
+                    home["Name"].lower()
+                )
+            if to_filter.sum() == 0:
+                # Do a fuzzy match on the name
+                # Find the best filter
+                to_filter = filtered["Address"].str.replace("  ,", "").str.split(",").str[0:2].str.join("").apply(
+                    lambda x: fuzz.partial_ratio(home["Name"], x) > 93
+                )
+            if to_filter.sum() == 0:
+                # We also some cases where the name of the survey folder is like "Colville Road 7" and the
+                # property name is actually 7 Colville Road, so we try taking the final part of the address,
+                # splitting on space, and adding it to the front
+                def reformat_survey_folder(x):
+                    filename = x.split("/")[-1]
+                    parts = filename.split(" ")
+                    return " ".join(parts[-1:] + parts[:-1])
+
+                to_filter = (
+                    filtered["survey_folder"].apply(lambda x: reformat_survey_folder(x)).str.lower() ==
+                    home["Name"].lower()
+                )
+
+            if to_filter.sum() == 0:
+                raise Exception("Error")
+            filtered = filtered[to_filter]
+
+        if filtered.empty:
+            continue
+
+        if filtered.shape[0] == 1:
+            wates_matching_lookup.append(
+                {
+                    "survey_folder": filtered["survey_folder"].values[0],
+                    "Asset ID": home["Asset ID"],
+                    "Name": home["Name"]
+                }
+            )
+            continue
+
+        raise Exception("No match")
+    wates_matching_lookup = pd.DataFrame(wates_matching_lookup)
+
+    # We get a match for all records
+    assert wates_matching_lookup.shape[0] == wates_coordination.shape[0]
+    assert not pd.isnull(wates_matching_lookup["Asset ID"]).sum()
+    assert not wates_matching_lookup["Asset ID"].duplicated().sum()
+
+    # Merge lookup tables onto the coordination sheets
+    wates_coordination = wates_coordination.merge(
+        wates_matching_lookup, how="left", on="Name"
+    )
+    missed_asset_id = wates_coordination[pd.isnull(wates_coordination["Asset ID_x"])]
+    if not missed_asset_id.empty:
+        raise Exception("Missing Asset ID")
+
+    if wates_coordination["Asset ID_x"].duplicated().sum():
+        raise Exception("Duplicated IDs in wates")
+
+    # We merge the mpt data on to the wates coordination
+    wates_coordination = wates_coordination.merge(
+        mtp_df, how="left", on="survey_folder"
+    )
+
+    ccs_coordination = ccs_coordination.merge(
+        ccs_matching_lookup, how="left", on="Name"
+    )
+    ccs_coordination = ccs_coordination.merge(
+        mtp_df, how="left", on="survey_folder"
+    )
+
+    retrofit_packages_board = retrofit_packages_board.merge(
+        matching_lookup, how="left", on="Name"
+    )
+
+    # We now map the retrofit assessment data to the coordinated packages
+    wates_coordination = wates_coordination.merge(
+        retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
+    )
+    ccs_coordination = ccs_coordination.merge(
+        retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
+    )
+    retrofit_packages_board = retrofit_packages_board.merge(
+        retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder"
+    )
+
+    # We have 4 properties in the Wates coordination board, that we want to remove from the retrofit packages board
+    to_remove = wates_coordination[
+        wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"])
+    ]
+    assert to_remove.shape[0] == 4
+    # Remove them from the wates board
+    wates_coordination = wates_coordination[
+        ~wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"])
+    ]
+
+    # We combine this into a singular board
+    coordinated_packages = pd.concat(
+        [
+            retrofit_packages_board[
+                [
+                    "Name", "Postcode", 'Actual SAP Band', 'Actual SAP Rating',
+                    'Modelled SAP Band', 'Modelled SAP Rating', 'Package Ref',
+                    'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
+                    'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
+                    'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
+                    'Solar PV', 'Other measures', 'Organisation Reference',
+                ] + retrofit_assessments_data_columns_prefixed
+                ],
+            ccs_coordination[
+                [
+                    # We don't have secondary wall insulation, Flat Roof, RIR, Heating Controls,
+                    # Solar PV
+                    "Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
+                    'SAP Band Install Package', 'Package Approved (Client)',
+                    'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
+                    'Ventilation', 'Heating', 'Other Measures', 'PV System',
+                    "Asset ID.1_y",
+                ] + retrofit_assessments_data_columns_prefixed
+                ].rename(
+                columns={
+                    "SAP Band Pre": "Actual SAP Band",
+                    "SAP Rating Pre": "Actual SAP Rating",
+                    'SAP Rating Install Package': 'Modelled SAP Band',
+                    'SAP Band Install Package': 'Modelled SAP Rating',
+                    'Package Approved (Client)': 'Package Ref',
+                    'Wall Insulation': 'Main Wall Insulation',
+                    'Loft Insulation': 'Loft insulation',
+                    'Windows Upgrade': 'Window Upgrade',
+                    'Ext. Doors Upgrade': 'Door Upgrade',
+                    'Heating': 'Main Heating',
+                    'Other Measures': 'Other measures',
+                    'Asset ID.1_y': 'Organisation Reference',
+                    "PV System": "Solar PV",
+                }
+            ),
+            wates_coordination[
+                [
+                    "Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
+                    'SAP Band Install Package', 'Package Approved (Client)',
+                    'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
+                    'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x', "PV System"
+                ] + retrofit_assessments_data_columns_prefixed
+                ].rename(
+                columns={
+                    "SAP Band Pre": "Actual SAP Band",
+                    "SAP Rating Pre": "Actual SAP Rating",
+                    'SAP Rating Install Package': 'Modelled SAP Band',
+                    'SAP Band Install Package': 'Modelled SAP Rating',
+                    'Package Approved (Client)': 'Package Ref',
+                    'Wall Insulation': 'Main Wall Insulation',
+                    'Loft Insulation': 'Loft insulation',
+                    'Windows Upgrade': 'Window Upgrade',
+                    'Ext. Doors Upgrade': 'Door Upgrade',
+                    'Heating': 'Main Heating',
+                    'Other Measures': 'Other measures',
+                    'Asset ID_x': 'Organisation Reference',
+                    "PV System": "Solar PV",
+                }
+            )
+        ]
+    )
+
+    coordinated_packages["Organisation Reference"] = coordinated_packages["Organisation Reference"].astype(int)
+    assert not coordinated_packages["Organisation Reference"].duplicated().sum()
+
+    # Merge the property features on
+    coordinated_packages = coordinated_packages.merge(
+        features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]],
+        how="left",
+        on="Organisation Reference"
+    )
+
+    coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current EPC Band"])]
+    coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current SAP Rating"])]
+
+    # We need the features pertaining to these priority postcodes
+
+    def find_nearest_matching_property(coordinated_packages, home):
+        filter_levels = [
+            (["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 2),
+            (["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 3),
+            (["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 4),
+            (["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 5),
+            (["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 6),
+            (["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 7),
+        ]
+
+        max_confidence = max([confidence for (_, confidence) in filter_levels])
+
+        for i, (filters, match_confidence) in enumerate(filter_levels):
+            match = coordinated_packages.copy()
+
+            for col in filters:
+                match = match[match[col] == home[col]]
+
+            if not match.empty:
+                return match, match_confidence
+
+        # Finally, we search for a property in the same Archetype
+        match = coordinated_packages[coordinated_packages["Archetype ID"] == home["Archetype ID"]]
+        if not match.empty:
+            return match, max_confidence + 1
+
+        return None, None  # No match found
+
+    coordinated_packages["Postal Region"] = coordinated_packages["Postcode"].str.split(" ").str[0].str.strip()
+    new_priority_postcodes["Postal Region"] = new_priority_postcodes["Postcode"].str.split(" ").str[0].str.strip()
+
+    coordinated_packages["Roof Simple"] = coordinated_packages["Roofs"].str.split(":").str[0].str.strip()
+    new_priority_postcodes["Roof Simple"] = new_priority_postcodes["Roofs"].str.split(":").str[0].str.strip()
+
+    coordinated_packages["Primary Property Type"] = coordinated_packages["Property Type"].str.split(":").str[0]
+    new_priority_postcodes["Primary Property Type"] = new_priority_postcodes["Property Type"].str.split(":").str[0]
+
+    coordinated_packages = coordinated_packages.merge(
+        new_priority_postcodes[["Organisation Reference", "Archetype ID"]],
+        how="left",
+        on="Organisation Reference"
+    )
+
+    # For every property in the priority postcodes data, we look for a most appropriate matching property
+    no_match = []
+    matches = []
+    for _, home in tqdm(new_priority_postcodes.iterrows(), total=len(new_priority_postcodes)):
+        # We check if the property was surveyed
+        survey_result = coordinated_packages[
+            coordinated_packages["Organisation Reference"] == home["Organisation Reference"]
+            ]
+        if not survey_result.empty:
+            to_extend = [
+                {
+                    "Organisation Reference": home["Organisation Reference"],
+                    "Best Match Organisation Reference": m,
+                    "match_confidence": 1,
+                    "Was Surveyed": True
+                } for m in survey_result["Organisation Reference"].values
+            ]
+            matches.extend(to_extend)
+            continue
+
+        closest_match, match_confidence = find_nearest_matching_property(coordinated_packages, home)
+        if closest_match is None:
+            no_match.append(home["Organisation Reference"])
+            continue
+
+        to_extend = [
+            {
+                "Organisation Reference": home["Organisation Reference"],
+                "Best Match Organisation Reference": m,
+                "match_confidence": match_confidence,
+                "Was Surveyed": False
+            } for m in closest_match["Organisation Reference"].values
+        ]
+        matches.extend(to_extend)
+
+    no_match_summary = new_priority_postcodes[
+        new_priority_postcodes["Organisation Reference"].isin(
+            no_match
+        )
+    ].groupby(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"])[
+        "Organisation Reference"].count().reset_index()
+
+    no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False)
+
+    # len(no_match)
+    # 8764, 5607, 5646, 5071
+    # no_match_summary.shape
+    # (3953, 6), (2948, 6), (2969, 7), (2575, 7)
+
+    matches_df = pd.DataFrame(matches)
+
+    matches_df = matches_df.merge(
+        coordinated_packages[["Organisation Reference", "Survey: Current EPC Band", "Survey: Current SAP Rating"]],
+        left_on="Best Match Organisation Reference", right_on="Organisation Reference",
+        suffixes=("", " - Closest Match")
+    )
+
+    measures_columns = [
+        'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
+        'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
+        'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
+        'Solar PV', 'Other measures'
+    ]
+
+    # We want to aggregate the matches, when we have multiple
+    aggregated_matches_df = []
+    for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
+
+        measures = coordinated_packages[
+            (
+                coordinated_packages["Organisation Reference"].isin(
+                    mapped_matches['Best Match Organisation Reference'].values
+                )
+            )
+        ][measures_columns]
+
+        if mapped_matches.shape[0] == 1:
+            # Get the measures for this property
+            measures = measures.squeeze()
+
+            aggregated_matches_df.append(
+                {
+                    "Organisation Reference": org_ref,
+                    "Number of matches": 1,
+                    "Proportion": 100,
+                    "Estimated SAP Rating": mapped_matches["Survey: Current SAP Rating"].values[0],
+                    "Estimated EPC Rating": mapped_matches["Survey: Current EPC Band"].values[0],
+                    "Was Surveyed": mapped_matches["Was Surveyed"].values[0],
+                    **measures
+                }
+            )
+            continue
+
+        # We need to aggregate the matches, since we have multiple
+        average_rating = mapped_matches["Survey: Current SAP Rating"].mean()
+        number_of_matches = mapped_matches.shape[0]
+        average_epc_rating = sap_to_epc(average_rating)
+        # proportion is the number of properties that have this EPC rating
+        proportion_with_this_epc = int(
+            mapped_matches[mapped_matches["Survey: Current EPC Band"] == average_epc_rating].shape[
+                0] / number_of_matches * 100
+        )
+
+        measures_aggregated = {}
+        for m in measures_columns:
+            if any(~pd.isnull(measures[m])):
+                # Check if we have 2 unique values
+                vals = measures[~pd.isnull(measures[m])][m].unique()
+                if len(vals) > 1:
+                    measures_aggregated[m] = ", ".join(vals)
+                else:
+                    measures_aggregated[m] = vals[0]
+
+        aggregated_matches_df.append(
+            {
+                "Organisation Reference": org_ref,
+                "Number of matches": number_of_matches,
+                "Proportion": proportion_with_this_epc,
+                "Estimated SAP Rating": average_rating,
+                "Estimated EPC Rating": average_epc_rating,
+                "Was Surveyed": False,
+                **measures_aggregated
+            }
+        )
+
+    aggregated_matches_df = pd.DataFrame(aggregated_matches_df)
+
+    mapped_priority_list = new_priority_postcodes.merge(
+        aggregated_matches_df, on="Organisation Reference", how="left"
+    )
+
+    mapped_priority_list["address1"] = mapped_priority_list["Address"].str.split(",").str[0]
+
+    # If we have a leading number like 01, 02, 03, 04, 05, 06, 07, 08, 09, we remove the leading 0
+
+    def remove_leading_zero(address):
+        return re.sub(r"^0([1-9]) ", r"\1 ", address)
+
+    mapped_priority_list["address1"] = mapped_priority_list["address1"].apply(remove_leading_zero)
+    mapped_priority_list["address1"] = np.where(
+        mapped_priority_list["Organisation Reference"] == 37004,
+        "8 Mason Road",
+        mapped_priority_list["address1"]
+    )
+    mapped_priority_list["address1"] = np.where(
+        mapped_priority_list["Organisation Reference"] == 37003,
+        "9 Mason Road",
+        mapped_priority_list["address1"]
+    )
+
+    mapped_priority_list = mapped_priority_list.rename(
+        columns={"UPRN": "uprn"}
+    )
+    mapped_priority_list["row_id"] = mapped_priority_list["Organisation Reference"]
+
+    # Flag where 2 out of the three columns have consensus
+    mapped_priority_list["2 of 3 Data Sources Have Consensus on EPC"] = (
+        (mapped_priority_list["SAP Band"] == mapped_priority_list["EPC Band"]) |
+        (mapped_priority_list["SAP Band"] == mapped_priority_list["Estimated EPC Rating"]) |
+        (mapped_priority_list["EPC Band"] == mapped_priority_list["Estimated EPC Rating"])
+    )
+
+    # Let's get the newest EPC data for these properties
+    # We merge on UPRN, when we have it
+    # from etl.route_march_data_pull.app import get_data
+    # epc_data, errors, nodata = get_data(
+    #     asset_list=mapped_priority_list,
+    #     fulladdress_column="Address",
+    #     address1_column="address1",
+    #     postcode_column="Postcode",
+    #     manual_uprn_map={},
+    #     epc_api_only=True
+    # )
+    #
+    # epc_df = pd.DataFrame(epc_data)
+    # epc_df.to_csv(
+    #     os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv"), index=False
+    # )
+    epc_df = pd.read_csv(os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv"))
+    epc_df = epc_df.rename(columns={"row_id": "Organisation Reference"})
+
+    # We now package up the data
+
+    # Sheet 1 is the base coordination data
+    output_coordination_sheet = coordinated_packages[
+        [
+            "Name", "Postcode", 'Organisation Reference', 'Package Ref',
+            'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
+            'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
+            'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
+            'Solar PV', 'Other measures',
+            'Survey: Current SAP Rating',
+            'Survey: Current EPC Band',
+            'Survey: Primary Energy Use (kWh/yr)',
+            'Survey: Primary Energy Use Intensity (kWh/m2/yr)',
+            'Survey: Number of Storeys', 'Survey: Fuel Bill',
+            'Survey: Window Age Description',
+            'Survey: Window Age Description Proportion (%)',
+            'Survey: Secondary Window Age Description',
+            'Survey: Secondary Window Age Description Proportion (%)',
+            'Survey: Number of Windows', 'Survey: Total Number of Doors',
+            'Survey: Number of Insulated Doors',
+            'Survey: Existing Primary Heating System',
+            'Survey: Existing Primary Heating PCDF Reference',
+            'Survey: Existing Primary Heating Controls',
+            'Survey: Existing Primary Heating % of Heat',
+            'Survey: Existing Secondary Heating System',
+            'Survey: Existing Secondary Heating PCDF Reference',
+            'Survey: Existing Secondary Heating Controls',
+            'Survey: Existing Secondary Heating % of Heat',
+            'Survey: Secondary Heating Code', 'Survey: Water Heating Code',
+            'Survey: Total Floor Area (m2)', 'Survey: Total Ground Floor Area (m2)',
+            'Survey: RIR Floor Area', 'Survey: Main Building Wall Area (m2)',
+            'Survey: First Extension Wall Area (m2)',
+            'Survey: Number of Light Fittings', 'Survey: Number of LEL Fittings',
+            'Survey: Number of fittings needing LEL', 'Survey: Main Roof Type',
+            'Survey: Main Roof Insulation',
+            'Survey: Main Roof Insulation Thickness', 'Survey: Main Wall Type',
+            'Survey: Main Wall Insulation', 'Survey: Main Wall Dry-lining',
+            'Survey: Main Wall Thickness',
+            'Survey: Main Building Alternative Wall Type',
+            'Survey: Main Building Alternative Wall Insulation',
+            'Survey: Main Building Alternative Wall Dry-lining',
+            'Survey: Main Building Alternative Wall Thickness',
+            'Survey: Main Fuel',
+            'Survey: Main Building Age Band',
+            'Walls', 'Roofs', 'Heating', 'Main Fuel', 'Age', 'Property Type'
+        ]
+    ].rename(
+        columns={
+            'Walls': "Parity - Walls",
+            'Roofs': "Parity - Roof",
+            'Heating': "Parity - Heating",
+            'Main Fuel': "Parity - Fuel",
+            'Age': "Parity - Age Band",
+            'Property Type': "Parity - Property Type"
+        }
+    )
+
+    # Sheet 2 is the lookup table which maps the properties to their closest match
+    # We need to bring in the parity attributes between the mapped properties so we can see side-by-side
+    mapped_lookup = matches_df[
+        [
+            'Organisation Reference',
+            'Best Match Organisation Reference',
+            'Survey: Current EPC Band',
+            'Survey: Current SAP Rating',
+            "Was Surveyed",
+            "match_confidence",
+        ]
+    ].rename(
+        columns={
+            'Best Match Organisation Reference': "Best Match - Organisation Reference",
+            "Survey: Current EPC Band": "Best Match - Survey: Current EPC Band",
+            'Survey: Current SAP Rating': "Best Match - Survey: Current SAp Rating"
+        }
+    ).merge(
+        features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type",
+                  "Total Floor Area"]],
+        how="left",
+        on="Organisation Reference"
+    ).merge(
+        features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type",
+                  "Total Floor Area"]].rename(
+            columns={
+                "Organisation Reference": "Best Match - Organisation Reference",
+                "Walls": "Best Match - Walls",
+                "Roofs": "Best Match - Roof",
+                "Heating": "Best Match - Heating",
+                "Main Fuel": "Best Match - Main Fuel",
+                "Age": "Best Match - Age",
+                "Property Type": "Best Match - Property Type",
+                "Total Floor Area": "Best Match - Total Floor Area"
+            }
+        ),
+        how="left",
+        on="Best Match - Organisation Reference"
+    ).merge(
+        coordinated_packages[
+            [
+                "Organisation Reference", 'Survey: Main Wall Type', 'Survey: Main Wall Insulation',
+                'Survey: Main Roof Type', 'Survey: Main Roof Insulation', 'Survey: Main Roof Insulation Thickness',
+                'Survey: Existing Primary Heating System', 'Survey: Main Building Age Band',
+                'Survey: Main Building Wall Area (m2)', 'Survey: Total Floor Area (m2)',
+                'Survey: Main Building Age Band',
+            ]
+        ].rename(
+            columns={
+                "Organisation Reference": "Best Match - Organisation Reference",
+                'Survey: Main Wall Type': 'Best Match - Survey: Main Wall Type',
+                'Survey: Main Wall Insulation': 'Best Match - Survey: Main Wall Insulation',
+                'Survey: Main Roof Type': 'Best Match - Survey: Main Roof Type',
+                'Survey: Main Roof Insulation': 'Best Match - Survey: Main Roof Insulation',
+                'Survey: Main Roof Insulation Thickness': 'Best Match - Survey: Main Roof Insulation Thickness',
+                'Survey: Existing Primary Heating System': 'Best Match - Survey: Existing Primary Heating System',
+            }
+        ),
+        how="left",
+        on="Best Match - Organisation Reference"
+    )
+
+    # Finally, we have the property, against the mapped home with the estimate SAP scores and the EPC data
+    worksheet = mapped_priority_list[
+        [
+            'Organisation Reference', 'Address', 'Postcode', 'Address ID', 'uprn', 'Archetype ID',
+            'SAP', 'SAP Band', "Property Type", "Walls", "Roofs", 'Glazing',
+            'Heating', 'Main Fuel', 'Hot Water', 'Number of matches', 'Proportion',
+            'Estimated SAP Rating', 'Estimated EPC Rating', "Was Surveyed",
+            'Main Wall Insulation',
+            'Secondary Wall Insulation', 'Loft insulation', 'Flat Roof',
+            'Room in Roof', 'Window Upgrade', 'Door Upgrade', 'Ventilation',
+            'Main Heating', 'Water Heating', 'Heating Controls', 'Solar PV',
+            'Other measures', "2 of 3 Data Sources Have Consensus on EPC"
+        ]
+    ].rename(
+        columns={
+            "SAP": "Parity - SAP Rating",
+            "SAP Band": "Parity - EPC Rating",
+            "Property Type": "Parity - Property Type",
+            "Walls": "Parity - Walls",
+            "Roofs": "Parity - Roofs",
+            'Glazing': "Parity - Glazing",
+            'Heating': 'Parity - Heating',
+            'Main Fuel': 'Parity - Main Fuel',
+            'Hot Water': 'Parity - Hot Water',
+            'Proportion': 'Proportion of matched properties with same EPC rating',
+        }
+    ).merge(
+        epc_df[
+            [
+                "Organisation Reference",
+                "uprn",
+                "current-energy-efficiency",
+                "current-energy-rating",
+                "lodgement-date",
+                "construction-age-band",
+                "walls-description",
+                "roof-description",
+                "mainheat-description",
+                "windows-description",
+                "hotwater-description",
+                "main-fuel",
+                "total-floor-area",
+            ]
+        ].rename(
+            columns={
+                "uprn": "Last EPC - uprn",
+                "current-energy-efficiency": "Last EPC - SAP Score",
+                "current-energy-rating": "Last EPC - EPC Rating",
+                "lodgement-date": "Last EPC - Date Lodged",
+                "construction-age-band": "Last EPC - Age Band",
+                "walls-description": "Last EPC - Walls",
+                "roof-description": "Last EPC - Roof",
+                "mainheat-description": "Last EPC - Heating",
+                "windows-description": "Last EPC - Windows",
+                "hotwater-description": "Last EPC - Hot Water",
+                "main-fuel": "Last EPC - Main Fuel",
+                "total-floor-area": "Last EPC - Total Floor Area"
+            }
+        ),
+        how="left",
+        on='Organisation Reference'
+    )
+
+    worksheet["Years Since Last EPC"] = pd.Timestamp.now().year - pd.to_datetime(
+        worksheet["Last EPC - Date Lodged"]).dt.year
+
+    worksheet["Last EPC - uprn"] = worksheet["Last EPC - uprn"].astype("Int64").astype(str)
+
+    worksheet["uprn"] = np.where(
+        pd.isnull(worksheet["uprn"]) & pd.notnull(worksheet["Last EPC - uprn"]),
+        worksheet["Last EPC - uprn"],
+        worksheet["uprn"]
+    )
+
+    worksheet["uprn"] = worksheet["uprn"].replace("<NA>", "")
+
+    worksheet = worksheet.drop(columns=["Last EPC - uprn"])
+
+    # Save to Excel with multiple sheets
+    excel_path = os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "13022025 Stonewater Priority List.xlsx")
+    with pd.ExcelWriter(excel_path, engine="xlsxwriter") as writer:
+        worksheet.to_excel(writer, sheet_name="Worksheet", index=False, header=True)
+        mapped_lookup.to_excel(writer, sheet_name="Lookup Table", index=False, header=True)
+        output_coordination_sheet.to_excel(writer, sheet_name="Coordination", index=False, header=True)
 
 # if __name__ == "__main__":
 #     main()
diff --git a/etl/customers/stonewater/data_cleaning.py b/etl/customers/stonewater/data_cleaning.py
index 8751960c..a5da0c79 100644
--- a/etl/customers/stonewater/data_cleaning.py
+++ b/etl/customers/stonewater/data_cleaning.py
@@ -1,6 +1,7 @@
 import os
 import shutil
 from tqdm import tqdm
+from etl.access_reporting.app import SharePointClient
 
 
 def delete_large_files():
@@ -66,13 +67,17 @@ def delete_large_files():
 def download_data_from_sharepoint():
     # Given a sharepoint location, this function will download the retrofit assessment folders from the locations
     # specified in the sharepoint location
-    from etl.access_reporting.app import SharePointClient
+
+    SHAREPOINT_CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID", None)
+    SHAREPOINT_CLIENT_SECRET = os.getenv("SHAREPOINT_CLIENT_SECRET", None)
+    SHAREPOINT_TENANT_ID = os.getenv("SHAREPOINT_TENANT_ID", None)
+    OSMOSIS_SHAREPOINT_SITE_ID = os.getenv("OSMOSIS_SHAREPOINT_SITE_ID", None)
 
     sharepoint_client = SharePointClient(
-        tenant_id="10d5af8b-2cfd-4882-9ccd-b96e4812dacf",
-        client_id="6832a4c5-fb8c-4082-a746-4f51e1020f0d",
-        client_secret="xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ",
-        site_id="bc925a9a-ad0b-4de9-9a3c-e61014cc7489"
+        tenant_id=SHAREPOINT_TENANT_ID,
+        client_id=SHAREPOINT_CLIENT_ID,
+        client_secret=SHAREPOINT_CLIENT_SECRET,
+        site_id=OSMOSIS_SHAREPOINT_SITE_ID
     )
 
     # Retrieve the data from Sharepoint and write to local machine
@@ -81,9 +86,14 @@ def download_data_from_sharepoint():
         folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
     )
 
-    len(contents["value"])
+    folders_to_keep = [
+        "1. Herefordshire", "2. Bedfordshire", "3. Wiltshire", "4. Bournemouth",
+        "5. Coventry", "6. West Sussex", "7. Dorset", "8. Cambridgeshire",
+        "9. Guildford", "10. Little Island", "11. CCS Dorset",
+    ]
+
     folders_to_pull = [
-        folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"]
+        folder for folder in contents["value"] if folder["name"] in folders_to_keep
     ]
     for folder_to_pull in folders_to_pull:
         # Get the contents
@@ -103,35 +113,42 @@ def download_data_from_sharepoint():
                 folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
                             folder_to_pull["name"] + "/" + property_folder["name"]
             )
-            # We look for the retrofit assessment folder:
+            if not property_folder_contents.get("value"):
+                continue
+            # We look for the retrofit assessment folder or mtp folders:
             property_sub_folders = [
-                f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower()
+                f for f in property_folder_contents["value"] if
+                "ra coordinator info" in f["name"].lower() or
+                "retrofit assessment" in f["name"].lower() or
+                "ra info" in f["name"].lower() or
+                "mtp" in f["name"].lower() or
+                "mid-term" in f["name"].lower()
             ]
 
             if not property_sub_folders:
                 continue
 
-            # if we have this, we download the folder and store it on my laptop!
-            property_sub_folder = property_sub_folders[0]
+            for property_sub_folder in property_sub_folders:
+                # if we have this, we download the folder and store it on my laptop!
 
-            property_folder_path = os.path.join(
-                "Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
-                folder_to_pull["name"],
-                property_folder["name"],
-                property_sub_folder["name"]
-            )
+                property_folder_path = os.path.join(
+                    "Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
+                    folder_to_pull["name"],
+                    property_folder["name"],
+                    property_sub_folder["name"]
+                )
 
-            download_dir = os.path.join(
-                "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys",
-                folder_to_pull["name"],
-                property_folder["name"],
-                property_sub_folder["name"]
-            )
+                download_dir = os.path.join(
+                    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys - 2",
+                    folder_to_pull["name"],
+                    property_folder["name"],
+                    property_sub_folder["name"]
+                )
 
-            # We download the folder
-            sharepoint_client.download_sharepoint_folder(
-                drive_id=sharepoint_client.document_drive["id"],
-                folder_path=property_folder_path,
-                download_dir=download_dir,
-                excluded_file_types=["MOV"]
-            )
+                # We download the folder
+                sharepoint_client.download_sharepoint_folder(
+                    drive_id=sharepoint_client.document_drive["id"],
+                    folder_path=property_folder_path,
+                    download_dir=download_dir,
+                    excluded_file_types=["MOV", "jpg"]
+                )
diff --git a/etl/customers/stonewater/potential_eco_properties.py b/etl/customers/stonewater/potential_eco_properties.py
index bda9c30c..6666ce15 100644
--- a/etl/customers/stonewater/potential_eco_properties.py
+++ b/etl/customers/stonewater/potential_eco_properties.py
@@ -217,78 +217,7 @@ def app():
     )
     )
 
-    # We get the EPC data
-    # epc_data = json.loads(
-    #     read_from_s3(
-    #         bucket_name="retrofit-data-dev",
-    #         s3_file_name="customers/Stonewater/clustering/epc_data.json"
-    #     )
-    # )
-    # epc_data = pd.DataFrame(epc_data)
-    #
-    # epc_data["uprn"] = np.where(
-    #     epc_data["internal_id"] == 1091,
-    #     83143766,
-    #     epc_data["uprn"]
-    # )
-    #
-    # epc_data_batch_2 = read_pickle_from_s3(
-    #     s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
-    #     bucket_name="retrofit-data-dev"
-    # )
-    # epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
-    #
-    # complete_epcs = pd.concat([epc_data, epc_data_batch_2])
-    #
-    # epcs_to_merge = complete_epcs[
-    #     [
-    #         "uprn",
-    #         "address",
-    #         "postcode",
-    #         "property-type",
-    #         "built-form",
-    #         "inspection-date",
-    #         "current-energy-rating",
-    #         "current-energy-efficiency",
-    #         "roof-description",
-    #         "walls-description",
-    #         "transaction-type",
-    #         "secondheat-description",
-    #         "total-floor-area",
-    #         "construction-age-band",
-    #         "floor-height",
-    #         "number-habitable-rooms",
-    #         "mainheat-description",
-    #         "energy-consumption-current"
-    #     ]
-    # ].rename(
-    #     columns={
-    #         "address": "Address",
-    #         "postcode": "Postcode",
-    #         "inspection-date": "Date of last EPC",
-    #         "current-energy-efficiency": "SAP score on register",
-    #         "current-energy-rating": "EPC rating on register",
-    #         "property-type": "Property Type",
-    #         "built-form": "Archetype",
-    #         "total-floor-area": "Property Floor Area",
-    #         "construction-age-band": "Property Age Band",
-    #         "floor-height": "Property Floor Height",
-    #         "number-habitable-rooms": "Number of Habitable Rooms",
-    #         "walls-description": "Wall Construction",
-    #         "roof-description": "Roof Construction",
-    #         "mainheat-description": "Heating Type",
-    #         "secondheat-description": "Secondary Heating",
-    #         "transaction-type": "Reason for last EPC",
-    #         "energy-consumption-current": "Heat Demand (kWh/m2)",
-    #     }
-    # )
-    # # We de-dupe, taking the newest on the date the EPC was lod
-    # epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
-    # epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
-    # epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
-
     stonewater_cavity_properties["UPRN"] = stonewater_cavity_properties["UPRN"].astype("Int64").astype(str)
-    stonewater_cavity_properties["Reason Included"].value_counts()
     # Find the postcodes where an Osmosis survey revealed a need for CWI
     postcodes_found_needing_cwi = stonewater_cavity_properties[
         stonewater_cavity_properties["Reason Included"].isin(
@@ -339,12 +268,7 @@ def app():
             "Renewables": "Parity - Renewables",
             "Total Floor Area": "Parity - Total Floor Area"
         }
-    )  # .merge(
-    #     epcs_to_merge,
-    #     how="left",
-    #     left_on="UPRN",
-    #     right_on="uprn"
-    # )
+    )
 
     # We now flag the additional properties in the as built list
 
@@ -434,20 +358,20 @@ def app():
 
     additional_properties["Suspected Needs CWI - not surveyed"] = (
         (
-            additional_properties["Postcode"].isin(postcodes_found_needing_cwi)
+            additional_properties["Postcode"].isin(postcodes_found_needing_cwi) &
+            ~additional_properties["Installed under ECO3"]
         )
     )
 
-    additional_properties["Same Postcode as Installed under ECO3"].value_counts()
-
     # We drop Full Address
     additional_properties = additional_properties.drop(columns=["Full Address"])
     additional_properties2 = additional_properties[[
         "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
         "Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area", 'Installed under ECO3',
-        'Same Postcode as Installed under ECO3'
+        'Same Postcode as Installed under ECO3', "Organisation Reference",
     ]].rename(
         columns={
+            "Organisation Reference": "Org. ref.",
             "SAP": "Parity - Predicted SAP",
             "SAP Band": "Parity - Predicted SAP Band",
             "Age": "Parity - Build Age",
@@ -461,65 +385,62 @@ def app():
             "Renewables": "Parity - Renewables",
             "Total Floor Area": "Parity - Total Floor Area"
         }
-    )  # .merge(
-    #     pd.DataFrame(additional_properties_epcs)[
-    #         [
-    #             "row_id",
-    #             "property-type",
-    #             "built-form",
-    #             "inspection-date",
-    #             "current-energy-rating",
-    #             "current-energy-efficiency",
-    #             "roof-description",
-    #             "walls-description",
-    #             "transaction-type",
-    #             "secondheat-description",
-    #             "total-floor-area",
-    #             "construction-age-band",
-    #             "floor-height",
-    #             "number-habitable-rooms",
-    #             "mainheat-description",
-    #             "energy-consumption-current"
-    #         ]
-    #     ].rename(
-    #         columns={
-    #             "inspection-date": "Date of last EPC",
-    #             "current-energy-efficiency": "SAP score on register",
-    #             "current-energy-rating": "EPC rating on register",
-    #             "property-type": "Property Type",
-    #             "built-form": "Archetype",
-    #             "total-floor-area": "Property Floor Area",
-    #             "construction-age-band": "Property Age Band",
-    #             "floor-height": "Property Floor Height",
-    #             "number-habitable-rooms": "Number of Habitable Rooms",
-    #             "walls-description": "Wall Construction",
-    #             "roof-description": "Roof Construction",
-    #             "mainheat-description": "Heating Type",
-    #             "secondheat-description": "Secondary Heating",
-    #             "transaction-type": "Reason for last EPC",
-    #             "energy-consumption-current": "Heat Demand (kWh/m2)",
-    #         }
-    #     ),
-    #     how="left",
-    #     on="row_id"
-    # )
+    )
+
+    # Combine the data:
+
+    stonewater_cavity_properties2 = stonewater_cavity_properties.merge(
+        features[["Address", "Organisation Reference"]], how="left", on="Organisation Reference"
+    )
+    full_dataset = pd.concat([stonewater_cavity_properties2, additional_properties2])
+    full_dataset = full_dataset.drop(columns=['Osm. ID'])
+
+    # We not define the priority list for non-intrusives
+    full_dataset["Postal Region"] = full_dataset["Postcode"].str.split(" ").str[0].str[0:2]
+    full_dataset["Postal Region 2"] = full_dataset["Postcode"].str.split(" ").str[0]
+
+    # Strip out anything we definitely don't want
+    full_dataset = full_dataset[~full_dataset["Installed under ECO3"]]
+
+    areas = full_dataset[full_dataset["Suspected Needs CWI - not surveyed"] == True]["Postal Region 2"].unique()
+
+    priorities = full_dataset[
+        full_dataset["Postal Region 2"].isin(areas)
+    ]
+
+    region_prevalance = priorities["Postal Region 2"].value_counts().to_frame().reset_index()
+    region_prevalance = region_prevalance[region_prevalance["count"] > 100]
+    df = priorities[priorities["Postal Region 2"].isin(region_prevalance["Postal Region 2"].values)]
+
+    df["Postal Region"].value_counts()
+    df["Postal Region 2"].value_counts()
+
+    if df["Installed under ECO3"].sum():
+        raise ValueError("There are properties in the priority list that were installed under ECO3")
+
+    df.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Non-intrusives/10022025 Non-Intrusives - "
+        "revised list.csv",
+        index=False
+    )
 
     # We save the data locally
-    stonewater_cavity_properties.to_csv(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties - priority "
-        "postcodes.csv",
-        index=False
-    )
-    additional_properties2.to_csv(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties - "
-        "non-priority postcodes.csv",
-        index=False
-    )
-    # Save the survey findings
-    needs_cwi.to_csv(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Properties Needing CWI - WIP.csv",
-        index=False
-    )
+    # stonewater_cavity_properties.to_csv(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties - priority "
+    #     "postcodes.csv",
+    #     index=False
+    # )
+    # additional_properties2.to_csv(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties - "
+    #     "non-priority postcodes.csv",
+    #     index=False
+    # )
+    # # Save the survey findings
+    # needs_cwi.to_csv(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Properties Needing CWI -
+    #     WIP.csv",
+    #     index=False
+    # )
 
 
 def cross_reference_epc_programme():
@@ -528,6 +449,12 @@ def cross_reference_epc_programme():
         "SURVEYED - ECO3 NOT COMPLETED.xlsx"
     )
 
+    for _, x in eco3_fallout.iterrows():
+        house_no = SearchEpc.get_house_number(x["ADDRESS"], "")
+        if house_no is None:
+            house_no = x["ADDRESS"].split(",")[0]
+        x["house_number"] = house_no
+
     eco3_fallout["house_number"] = eco3_fallout.apply(
         lambda x: SearchEpc.get_house_number(x["ADDRESS"], ""), axis=1
     )
@@ -558,3 +485,58 @@ def cross_reference_epc_programme():
             stonewater_modelled_above_c["Address"].apply(lambda x: fuzz.ratio(x, property["ADDRESS"]) > 90)
         ]
         match.head()
+
+
+def finalise_list_for_non_intrusives():
+    non_intrusives_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Non-intrusives/20250207 Stonewater "
+        "Non-Intrusives.xlsx"
+    )
+
+    # Remove anything installed under ECO3
+    non_intrusives_list = non_intrusives_list[~non_intrusives_list["Installed under ECO3"]]
+
+    # We make any properties that were surveyed by Osmosis
+    packages = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/Stonewater - Bid Packages WIP 14.11.20 V2 "
+        "(1).xlsx",
+        header=13,
+        sheet_name="Modelled Packages"
+    )
+
+    non_intrusives_list["Surveyed by Osmosis"] = non_intrusives_list["Address ID"].isin(
+        packages["Address ID"].values
+    )
+    # Removed 54 addresses
+    final_non_intrusives = non_intrusives_list[
+        ~non_intrusives_list["Surveyed by Osmosis"]
+    ]
+
+    features = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
+        "master sheet.csv",
+        encoding='latin1'
+    )
+
+    # Add on the orgnisaion reference
+    final_non_intrusives = final_non_intrusives.merge(
+        features[["Organisation Reference", "Address ID"]],
+        how="left",
+        on="Address ID"
+    )
+
+    final_non_intrusives["Postal Region"] = final_non_intrusives["Postcode"].str.split(" ").str[0].str[0:2]
+    selected_regions = final_non_intrusives[
+        final_non_intrusives["Include in non-intrusives"]
+    ]["Postcode"].unique()
+
+    final_non_intrusives["Is in region"] = final_non_intrusives["Postcode"].isin(selected_regions)
+
+    # Filter down:
+    final_non_intrusives = final_non_intrusives[
+        final_non_intrusives["Is in region"]
+    ]
+
+    final_non_intrusives.to_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Non-intrusives/10022025 Non-Intrusives "
+        "List - final.xlsx")
diff --git a/etl/find_my_epc/AssetListEpcData.py b/etl/find_my_epc/AssetListEpcData.py
index bce8cd1f..1d2e1472 100644
--- a/etl/find_my_epc/AssetListEpcData.py
+++ b/etl/find_my_epc/AssetListEpcData.py
@@ -72,12 +72,20 @@ class AssetListEpcData:
             epc_searcher.find_property(skip_os=True)
             if epc_searcher.newest_epc is None:
                 continue
-
-            find_epc_searcher = RetrieveFindMyEpc(
-                address=epc_searcher.newest_epc["address1"],
-                postcode=epc_searcher.newest_epc["postcode"]
-            )
-            find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+            # Attempt both methods:
+            try:
+                find_epc_searcher = RetrieveFindMyEpc(
+                    address=epc_searcher.newest_epc["address1"] + ", " + epc_searcher.newest_epc["address2"],
+                    postcode=epc_searcher.newest_epc["postcode"]
+                )
+                find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+            except Exception as e:
+                logger.error(f"Error retrieving find my epc data: {e}")
+                find_epc_searcher = RetrieveFindMyEpc(
+                    address=epc_searcher.newest_epc["address1"],
+                    postcode=epc_searcher.newest_epc["postcode"]
+                )
+                find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
             time.sleep(0.5)
             # We need uprn
 
diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py
index f93a5a73..9852cc0d 100644
--- a/etl/find_my_epc/RetrieveFindMyEpc.py
+++ b/etl/find_my_epc/RetrieveFindMyEpc.py
@@ -25,6 +25,7 @@ class RetrieveFindMyEpc:
         self.postcode = postcode
 
         self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower()
+        self.walls = []
 
     @staticmethod
     def extract_low_carbon_sources(soup):
@@ -102,6 +103,8 @@ class RetrieveFindMyEpc:
         # 2) Bills estimates
         # 3) Recommendations and SAP points
         # 4) Low and zero carbon energy sources
+        # 5) The wall types of the property - used for determining if we have an extension wall insulation#
+        #    recommendation
 
         ratings = address_res.find('desc', {'id': 'svg-desc'}).text
         current_rating = ratings.split(".")[0]
@@ -208,6 +211,17 @@ class RetrieveFindMyEpc:
             if key not in assessment_data:
                 raise ValueError(f"Missing key: {key}")
 
+        # The wall types of the property
+        property_features_table = address_res.find("tbody", class_="govuk-table__body")
+        property_features_table = property_features_table.find_all("tr")
+
+        # Extract wall types
+        self.walls = []
+        for row in property_features_table:
+            cells = row.find_all("td")
+            if row.find("th").text.strip() == "Wall":
+                self.walls.append(cells[0].text.strip())
+
         # Finally, we format the recommendations
         recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
 
@@ -229,8 +243,7 @@ class RetrieveFindMyEpc:
 
         return resulting_data
 
-    @staticmethod
-    def format_recommendations(recommendations, assessment_data, sap_2012_date=None):
+    def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None):
         """
         This function converts the recommendations to a format that we can use in the engine as a non-intrusive survey
         :param recommendations: The recommendations from the EPC
@@ -317,7 +330,8 @@ class RetrieveFindMyEpc:
                 "roomstat_programmer_trvs", "time_temperature_zone_control"
             ],
             "Replacement warm air unit": [],
-            "Secondary glazing": ["secondary_glazing"]
+            "Secondary glazing": ["secondary_glazing"],
+            "Condensing heating unit": ["boiler_upgrade"],
         }
 
         survey = True
@@ -330,6 +344,8 @@ class RetrieveFindMyEpc:
         for rec in recommendations:
             mapped = measure_map[rec["measure"]]
             for measure in mapped:
+                if measure == "cavity_wall_insulation" and "solid brick" in self.walls[0].lower():
+                    measure = "extension_cavity_wall_insulation"
                 to_append = {
                     "type": measure,
                     "sap_points": rec["sap_points"],
diff --git a/etl/route_march_data_pull/app.py b/etl/route_march_data_pull/app.py
deleted file mode 100644
index 8d19aa84..00000000
--- a/etl/route_march_data_pull/app.py
+++ /dev/null
@@ -1,396 +0,0 @@
-import os
-import time
-
-import pandas as pd
-import numpy as np
-from tqdm import tqdm
-
-from dotenv import load_dotenv
-from backend.SearchEpc import SearchEpc
-from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
-from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
-
-from recommendations.recommendation_utils import (
-    estimate_perimeter,
-    estimate_external_wall_area,
-    estimate_number_of_floors
-)
-
-load_dotenv(dotenv_path="backend/.env")
-EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
-
-
-def get_data(asset_list, fulladdress_column, address1_column, postcode_column, manual_uprn_map):
-    epc_data = []
-    errors = []
-    no_epc = []
-    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
-        try:
-            postcode = home[postcode_column]
-            house_number = home[address1_column].strip()
-            full_address = home[fulladdress_column].strip()
-            house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode)
-            if house_no is None:
-                house_no = house_number
-            uprn = manual_uprn_map.get(full_address, None)
-
-            searcher = SearchEpc(
-                address1=str(house_no),
-                postcode=postcode,
-                auth_token=EPC_AUTH_TOKEN,
-                os_api_key="",
-                property_type=None,
-                fast=True,
-                full_address=full_address,
-                max_retries=5,
-                uprn=uprn
-            )
-            # Force the skipping of estimating the EPC
-            searcher.ordnance_survey_client.property_type = None
-            searcher.ordnance_survey_client.built_form = None
-
-            searcher.find_property(skip_os=True)
-
-            # Check if we have a flat or appartment
-            if searcher.newest_epc is None and uprn is None:
-                # Try again:
-                if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None:
-                    # Backup
-                    add1 = full_address.split(",")
-                    if len(add1) > 1:
-                        add1 = add1[1].strip()
-                    else:
-                        # Try splitting on space
-                        add1 = full_address.split(" ")[0].strip()
-
-                else:
-                    add1 = str(house_number)
-                searcher = SearchEpc(
-                    address1=add1,
-                    postcode=postcode,
-                    auth_token=EPC_AUTH_TOKEN,
-                    os_api_key="",
-                    property_type=None,
-                    fast=True,
-                    full_address=full_address,
-                    max_retries=5
-                )
-
-                if (
-                    "flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in
-                    house_number.lower()
-                ):
-                    searcher.ordnance_survey_client.property_type = "Flat"
-
-                searcher.find_property(skip_os=True)
-
-            if searcher.newest_epc is None:
-                no_epc.append(home["row_id"])
-                continue
-
-            # Look for EPC recommendatons
-            try:
-                property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
-            except:
-                property_recommendations = {"rows": []}
-
-            # Retrieve data from FindMyEPC
-            try:
-                find_epc_searcher = RetrieveFindMyEpc(
-                    address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"]
-                )
-                find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
-            except ValueError as e:
-                if "No EPC found" in str(e) and "address1" in searcher.newest_epc:
-                    find_epc_searcher = RetrieveFindMyEpc(
-                        address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"]
-                    )
-                    find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
-                else:
-                    find_epc_data = {}
-            except Exception as e:
-                raise Exception(f"Error retrieving FindMyEPC data: {e}")
-            time.sleep(np.random.uniform(0.1, 1))
-
-            epc = {
-                "row_id": home["row_id"],
-                **searcher.newest_epc.copy(),
-                "recommendations": property_recommendations["rows"],
-                "find_my_epc_data": find_epc_data,
-            }
-
-            epc_data.append(epc)
-        except Exception as e:
-            errors.append(home["row_id"])
-            time.sleep(5)
-
-    return epc_data, errors, no_epc
-
-
-def extract_address1(asset_list, full_address_col, method="first_two_words"):
-    if method == "first_two_words":
-        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
-        return asset_list
-
-    if method == "first_word":
-        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
-        return asset_list
-
-    raise ValueError(f"Method {method} not recognized")
-
-
-def app():
-    """
-    This app is EPC pulling data for some properties owned by Livewest
-
-    Data request contents:
-    Date of last EPC
-    Reason for EPC
-    SAP score on register
-    Property Type
-    Property Area
-    Property Age
-    Any Dimensions (HLP,PW,RH)
-    Property Wall Construction
-    Heating Type
-    Secondary Heating
-    Loft Insulation Depth
-
-    Additional if possible:
-    Heat loss calculations
-    EPC recommendations
-    Property UPRN
-
-    """
-    DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern"
-    DATA_FILENAME = "January 2025 Additions Query.xlsx"
-    SHEET_NAME = "Jan 2025 additions"
-    POSTCODE_COLUMN = "Post Code"
-    FULLADDRESS_COLUMN = "Street / Block Name"
-    ADDRESS1_COLUMN = None
-    ADDRESS1_METHOD = "first_word"
-    ADDRESS_COLS_TO_CONCAT = []
-
-    # Maps addresses to uprn in problematic cases
-    MANUAL_UPRN_MAP = {
-        "Ardelagh Ardelagh Faris Lane Woodham Addlestone KT15 3DJ": 100061484560
-    }
-
-    asset_list = pd.read_excel(os.path.join(DATA_FOLDER, DATA_FILENAME), header=0, sheet_name=SHEET_NAME)
-    asset_list = asset_list[~pd.isnull(asset_list[POSTCODE_COLUMN])].reset_index()
-    asset_list["row_id"] = asset_list.index
-
-    # We clean up portential non-breaking spaces, and double spaces
-    for col in [c for c in [POSTCODE_COLUMN, FULLADDRESS_COLUMN, ADDRESS1_COLUMN] if c is not None]:
-        asset_list[col] = asset_list[col].astype(str)
-        asset_list[col] = asset_list[col].str.replace('\xa0', ' ', regex=False)
-        asset_list[col] = asset_list[col].str.replace('  ', ' ', regex=False)
-
-    if ADDRESS1_COLUMN is None:
-        ADDRESS1_COLUMN = "address1_extracted"
-        asset_list = extract_address1(
-            asset_list=asset_list, full_address_col=FULLADDRESS_COLUMN, method=ADDRESS1_METHOD
-        )
-
-    if FULLADDRESS_COLUMN is None:
-        FULLADDRESS_COLUMN = "fulladdress_extracted"
-        # We concatenate the columns in ADDRESS_COLS_TO_CONCAT, on commas
-        asset_list[FULLADDRESS_COLUMN] = asset_list[ADDRESS_COLS_TO_CONCAT].apply(lambda x: ", ".join(x), axis=1)
-
-    # We check for duplicated addresses
-    asset_list["deduper"] = asset_list[FULLADDRESS_COLUMN] + asset_list[POSTCODE_COLUMN]
-    if asset_list["deduper"].duplicated().sum():
-        # Drop the dupes
-        print(f"There are {asset_list['deduper'].duplicated().sum()} duplicated addresses - dropping")
-        asset_list = asset_list[~asset_list["deduper"].duplicated()]
-    asset_list = asset_list.drop(columns=["deduper"])
-
-    epc_data, errors, no_epc = get_data(
-        asset_list=asset_list,
-        fulladdress_column=FULLADDRESS_COLUMN,
-        address1_column=ADDRESS1_COLUMN,
-        postcode_column=POSTCODE_COLUMN,
-        manual_uprn_map=MANUAL_UPRN_MAP
-    )
-
-    # We now retrieve any failed properties
-    asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
-    epc_data_failed, _, _ = get_data(
-        asset_list=asset_list_failed,
-        fulladdress_column=FULLADDRESS_COLUMN,
-        address1_column=ADDRESS1_COLUMN,
-        postcode_column=POSTCODE_COLUMN,
-        manual_uprn_map=MANUAL_UPRN_MAP
-    )
-
-    no_data = asset_list[asset_list["row_id"].isin(no_epc)]
-    print(no_data[[FULLADDRESS_COLUMN, POSTCODE_COLUMN]])
-
-    # Append the failed data to the main data
-    epc_data.extend(epc_data_failed)
-
-    epc_df = pd.DataFrame(epc_data)
-
-    # We expand out the recommendations
-    recommendations_df = epc_df[["row_id", "recommendations"]]
-
-    unique_recommendations = set()
-    for _, row in recommendations_df.iterrows():
-        unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
-
-    columns = ["row_id"] + list(unique_recommendations)
-    transformed_data = []
-    for _, row in recommendations_df.iterrows():
-        # Initialize a dictionary for this row with False for all recommendations
-        row_data = {col: False for col in columns}
-        row_data["row_id"] = row["row_id"]
-
-        # Set True for each recommendation present in this row
-        for rec in row["recommendations"]:
-            recommendation_text = rec["improvement-summary-text"]
-            row_data[recommendation_text] = True
-
-        # Append the row data to transformed_data
-        transformed_data.append(row_data)
-
-    transformed_df = pd.DataFrame(transformed_data)
-    # Drop the column that is ""
-    if "" in transformed_df.columns:
-        transformed_df = transformed_df.drop(columns=[""])
-
-    # Get the find my epc data
-    find_my_epc_data = epc_df[["row_id", "find_my_epc_data"]].drop(columns=["find_my_epc_data"]).join(
-        pd.json_normalize(epc_df["find_my_epc_data"])
-    )
-    # We check if we get the solar pv column:
-    if "Solar photovoltaics" not in find_my_epc_data.columns:
-        find_my_epc_data["Solar photovoltaics"] = False
-
-    # Retrieve just the data we need
-    epc_df = epc_df[
-        [
-            "row_id",
-            "uprn",
-            "address1",
-            "address",
-            "postcode",
-            "property-type",
-            "built-form",
-            "inspection-date",
-            "current-energy-rating",
-            "current-energy-efficiency",
-            "roof-description",
-            "walls-description",
-            "floor-description",
-            "transaction-type",
-            # New fields needed
-            "secondheat-description",
-            "total-floor-area",
-            "construction-age-band",
-            "floor-height",
-            "number-habitable-rooms",
-            "mainheat-description",
-            #
-            "energy-consumption-current",  # kwh/m2
-            "photo-supply",
-        ]
-    ].rename(columns={"address1": "Address1 on EPC", "address": "Address on EPC", "postcode": "Postcode on EPC"})
-
-    asset_list = asset_list.merge(
-        epc_df,
-        how="left",
-        on="row_id"
-    ).merge(
-        find_my_epc_data[
-            [
-                "row_id", "heating_text", "hot_water_text", 'Assessor’s name',
-                "Assessor's Telephone", "Assessor's Email", "Accreditation scheme",
-                "Assessor’s ID", "Solar photovoltaics"
-            ]
-        ].rename(
-            columns={
-                "Solar photovoltaics": "Has Solar PV",
-                "heating_text": "Heating Estimated kWh",
-                "hot_water_text": "Hot Water Estimated kWh",
-            }
-        ),
-        how="left",
-        on="row_id"
-    )
-
-    asset_list["Has Solar PV"] = asset_list["Has Solar PV"] | ~asset_list["photo-supply"].isin(["0.0", 0, None, ""])
-    asset_list = asset_list.drop(columns=["photo-supply"])
-
-    # Rename the columns
-    asset_list = asset_list.rename(columns={
-        "inspection-date": "Date of last EPC",
-        "current-energy-efficiency": "SAP score on register",
-        "current-energy-rating": "EPC rating on register",
-        "property-type": "Property Type",
-        "built-form": "Archetype",
-        "total-floor-area": "Property Floor Area",
-        "construction-age-band": "Property Age Band",
-        "floor-height": "Property Floor Height",
-        "number-habitable-rooms": "Number of Habitable Rooms",
-        "walls-description": "Wall Construction",
-        "roof-description": "Roof Construction",
-        "floor-description": "Floor Construction",
-        "mainheat-description": "Heating Type",
-        "secondheat-description": "Secondary Heating",
-        "transaction-type": "Reason for last EPC",
-        "energy-consumption-current": "Heat Demand (kWh/m2)",
-    })
-
-    asset_list["Estimated Number of Floors"] = asset_list.apply(
-        lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
-            x["Property Type"]) else None, axis=1
-    )
-
-    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
-    # Replace "" value with None
-    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
-    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
-
-    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
-        lambda x: estimate_perimeter(
-            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
-            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
-        ), axis=1
-    )
-
-    asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
-        lambda x: estimate_external_wall_area(
-            num_floors=x["Estimated Number of Floors"],
-            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
-            perimeter=x["Estimated Perimeter (m)"],
-            built_form=x["Archetype"]
-        ),
-        axis=1
-    )
-
-    asset_list["Roof Insulation Thickness"] = asset_list.apply(
-        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
-            x["Roof Construction"]) else None,
-        axis=1
-    )
-
-    # For all of the columns in transformed_df, prefix with "Recommendation: "
-    for col in transformed_df.columns:
-        if col == "row_id":
-            continue
-        transformed_df = transformed_df.rename(columns={col: f"Recommendation: {col}"})
-
-    asset_list = asset_list.merge(
-        transformed_df,
-        how="left",
-        on="row_id"
-    )
-    asset_list = asset_list.drop(columns=["row_id", "index"])
-
-    # Store as an excel
-    filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " EPC Data Pull - Main.xlsx"
-    asset_list.to_excel(filename, index=False)
-
-    matches_review = asset_list[
-        [FULLADDRESS_COLUMN, ADDRESS1_COLUMN, POSTCODE_COLUMN, "Address on EPC", "Postcode on EPC"]
-    ]
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index c5c07f89..e4dd3a78 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -852,6 +852,8 @@ class HeatingRecommender:
         else:
             heating_simulation_config["mainheat_energy_eff_ending"] = self.property.data["mainheat-energy-eff"]
 
+        # TODO:We possibly shouldn't touch the hot water energy efficiency if we aren't recommending dual immersion
+        #      we'll keep this for the moment though
         if self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor"]:
             heating_simulation_config["hot_water_energy_eff_ending"] = "Average"
         else:
@@ -993,7 +995,7 @@ class HeatingRecommender:
         # We check if there's a mains connection and the hot water is inefficient, as this will improve with a boiler
         has_inefficient_water = (
             self.property.data["mains-gas-flag"] and
-            self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]
+            self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor"]
         )
 
         non_invasive_recommendation = next((
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 15614a0b..715332a5 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -503,7 +503,9 @@ class Recommendations:
                         impact_summary.append(
                             {
                                 "phase": rec["phase"],
+                                "representative": rec["recommendation_id"] in representative_ids,
                                 "recommendation_id": rec["recommendation_id"],
+                                "measure_type": rec["measure_type"],
                                 "sap": sap + rec["sap_points"],
                                 "carbon": carbon - rec["co2_equivalent_savings"],
                                 "heat_demand": heat_demand - rec["heat_demand"],
@@ -621,6 +623,13 @@ class Recommendations:
                     if li_sap_limit is not None:
                         property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit)
 
+                if rec["type"] == "solar_pv":
+                    # We use the SAP points in the recommendation as a minimum
+                    property_phase_impact["sap"] = (
+                        rec["sap_points"] if property_phase_impact["sap"] < rec["sap_points"] else
+                        property_phase_impact["sap"]
+                    )
+
                 # Insert this information into the recommendation.
                 if not rec.get("survey", False):
                     rec["sap_points"] = property_phase_impact["sap"]
@@ -647,7 +656,9 @@ class Recommendations:
         return property_recommendations, impact_summary
 
     @staticmethod
-    def map_descriptions_to_fuel(heating_description, hotwater_description, main_fuel_description):
+    def map_descriptions_to_fuel(
+        heating_description, hotwater_description, main_fuel_description, descriptions_to_fuel_types
+    ):
 
         # Handle the case of community schemes
         if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"):
@@ -660,7 +671,7 @@ class Recommendations:
                 }
             raise NotImplementedError("Handle this case")
 
-        mapped = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
+        mapped = descriptions_to_fuel_types[heating_description]
         heating_fuel = mapped["fuel"]
 
         if hotwater_description in [
@@ -680,7 +691,7 @@ class Recommendations:
                 "heating_cop": mapped["cop"], "hotwater_cop": 1
             }
 
-        mapped_hotwater = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[hotwater_description]
+        mapped_hotwater = descriptions_to_fuel_types[hotwater_description]
 
         return {
             "heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"],
@@ -689,7 +700,7 @@ class Recommendations:
 
     @classmethod
     def calculate_recommendation_tenant_savings(
-        cls, property_instance, kwh_simulation_predictions, property_recommendations
+        cls, property_instance, kwh_simulation_predictions, property_recommendations, ashp_cop=None
     ):
         """
         This method inserts the kwh savings and the bill savings that the customer will make from the recommendations
@@ -701,9 +712,12 @@ class Recommendations:
         :param property_instance: Instance of the Property class, for the home associated to property_id
         :param kwh_simulation_predictions: dictionary of predictions from the model apis
         :param property_recommendations: dictionary of recommendations for the property
+        :param ashp_cop: The coefficient of performance for the air source heat pump.
         :return:
         """
 
+        ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY
+
         kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
             kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id)
             ].merge(
@@ -772,12 +786,19 @@ class Recommendations:
                     if kwh_impact_table.loc[i, col] > previous_phase[col].max():
                         kwh_impact_table.loc[i, col] = previous_phase[col].max()
 
+        descriptions_to_fuel_types = assumptions.DESCRIPTIONS_TO_FUEL_TYPES
+        # We will the air source heat pump efficiencies
+        ashp_keys = [k for k in descriptions_to_fuel_types.keys() if "air source heat pump" in k.lower()]
+        for k in ashp_keys:
+            descriptions_to_fuel_types[k]["cop"] = ashp_cop
+
         # For heating system recommendations, this could result in a fuel type change so we reflect that
         fuel_mapping = pd.DataFrame([
             {
                 "id": epc["id"],
                 **cls.map_descriptions_to_fuel(
-                    epc["mainheat-description"], epc["hotwater-description"], epc["main-fuel"]
+                    epc["mainheat-description"], epc["hotwater-description"], epc["main-fuel"],
+                    descriptions_to_fuel_types
                 )
             } for epc in property_instance.updated_simulation_epcs
         ])
@@ -791,7 +812,8 @@ class Recommendations:
                             **cls.map_descriptions_to_fuel(
                                 property_instance.data["mainheat-description"],
                                 property_instance.data["hotwater-description"],
-                                property_instance.data["main-fuel"]
+                                property_instance.data["main-fuel"],
+                                descriptions_to_fuel_types
                             )
                         }
                     ]
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 95f189d3..a97dbcb3 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -14,11 +14,16 @@ class SolarPvRecommendations:
     # This was previously set to 250w, but has been upped to 400 based on the systems used by Cotswolrd Energy Group
     SOLAR_PANEL_WATTAGE = 400
 
+    # For domestic properties, we don't recommend a solar PV system with wattage outside of these
+    # bounds
     MAX_SYSTEM_WATTAGE = 6000
     MIN_SYSTEM_WATTAGE = 1000
 
+    # the maximum area of root we allow to be covered in solar panels for our recommendations.
     MAX_ROOF_AREA_PERCENTAGE = 0.7
 
+    SAP_POINTS_PER_5_PERCENT_ROOF_COVERAGE = 1
+
     def __init__(self, property_instance):
         """
         :param property_instance: Instance of the Property class, for the home associated to property_id
@@ -212,6 +217,20 @@ class SolarPvRecommendations:
             roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100)
             # We round up to the nearest 5
             roof_coverage_percent = np.ceil(roof_coverage_percent / 5) * 5
+
+            # Typically, we've observed that every 5% of additional roof coverage will result in at least
+            # an additional 1 SAP points (though often 2 points) Given this, we can add a reasonable minimum
+            # for the number of SAP points we might expect. We've observed that for some cases where properties
+            # are hitting the higher SAP scores (e.g. EPC A and above), the model can sometimes under-predict
+            # the number of SAP points. This appears to be due to a relatively small number of properties
+            # actually achieving the upper echelons of EPC rating. This can be the case if we're simulating a
+            # whole house retrofit where the home is getting complete insulation, a heat pump and solar panels.
+            # Because panels are the final recommendation, they are often the measure that takes the home
+            # into the medium to high EPC A ranges and so because of a lack of training data, this means that
+            # we might sometime under-predict. This minimum is intended to try and reduce the negative impact
+            # of this. This minimum is used in Recommendations.calculate_recommendation_impact
+            minimum_sap_points = (roof_coverage_percent / 5) * self.SAP_POINTS_PER_5_PERCENT_ROOF_COVERAGE
+
             for has_battery in [False, True]:
                 cost_result = self.costs.solar_pv(
                     has_battery=has_battery,
@@ -240,7 +259,7 @@ class SolarPvRecommendations:
                         "description": description,
                         "starting_u_value": None,
                         "new_u_value": None,
-                        "sap_points": None,
+                        "sap_points": minimum_sap_points,
                         "already_installed": already_installed,
                         **cost_result,
                         # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index 1f755369..46e56c93 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -215,21 +215,29 @@ class WindowsRecommendations:
             "glazed-type": glazed_type_ending,
         }
 
+        measure_type = "double_glazing" if not is_secondary_glazing else "secondary_glazing"
+
+        non_invasive_recommendation = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] in ["windows_glazing", measure_type]),
+            {}
+        )
+
         self.recommendation = [
             {
                 "phase": phase,
                 "parts": [],
                 "type": "windows_glazing",
-                "measure_type": "double_glazing" if not is_secondary_glazing else "secondary_glazing",
+                "measure_type": measure_type,
                 "description": description,
                 "starting_u_value": None,
                 "new_u_value": None,
-                "sap_points": None,
+                "sap_points": non_invasive_recommendation.get("sap_points", None),
                 "already_installed": already_installed,
                 **cost_result,
                 "is_secondary_glazing": is_secondary_glazing,
                 "description_simulation": description_simulation,
                 "simulation_config": simulation_config,
+                "survey": non_invasive_recommendation.get("survey", None),
             }
         ]
 
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index 00da6107..602684cf 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -205,7 +205,7 @@ def get_wall_u_value(
 
         mapped_value = wall_uvalues_df[
             wall_uvalues_df["Wall_type"] == mapped_description
-        ][age_band].values[0]
+            ][age_band].values[0]
 
         if pd.isnull(mapped_value) and "Park home" in mapped_description:
             # We don't know enough in this case so we default to 0
@@ -428,6 +428,9 @@ def estimate_number_of_floors(property_type):
     Using the property type, we estimate the number of floors in the property
     """
 
+    if property_type is None:
+        return None
+
     if property_type == "House":
         number_of_floors = 2
     elif property_type in ["Flat", "Bungalow"]:
@@ -560,7 +563,7 @@ def get_floor_u_value(
         insulation_lookup = s11[
             s11["Age_band"].str.contains(age_band) & s11["Floor_construction"]
             == floor_type
-        ]
+            ]
         if insulation_lookup.empty:
             insulation_thickness = 0
         else:
diff --git a/survey_report/app.py b/survey_report/app.py
new file mode 100644
index 00000000..f6eddb8d
--- /dev/null
+++ b/survey_report/app.py
@@ -0,0 +1,270 @@
+import os
+import requests
+import PyPDF2
+from string import Template
+
+import pandas as pd
+
+from survey_report.extraction.detect_report_type import detect_report_type
+from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor
+
+
+def generate_html_report(template_path, output_path, data):
+    """
+    Reads an HTML template file, injects dynamic values, and generates a final HTML report.
+
+    Args:
+    - template_path (str): Path to the HTML template file.
+    - output_path (str): Path to save the generated HTML file.
+    - data (dict): Dictionary containing dynamic values for the report.
+    """
+    # Read the template file
+    with open(template_path, "r", encoding="utf-8") as f:
+        html_template = Template(f.read())  # Use Template from string module
+
+    # Replace placeholders with actual data
+    final_html = html_template.safe_substitute(data)  # Use safe_substitute to prevent missing key errors
+
+    # Save the generated HTML file
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(final_html)
+
+    print(f"HTML report generated successfully: {output_path}")
+
+
+def stringify_number(num: int, rounding: bool = True) -> str:
+    if num < 100000:  # 5 figures or fewer
+        rounded_num = ((num + 99) // 100) * 100 if rounding else num
+        return f"{rounded_num:,}"
+    else:  # More than 5 figures
+        rounded_num = ((num + 999) // 1000) * 1000 if rounding else num
+        return f"{rounded_num // 1000}k"
+
+
+class PlacidApi:
+    # Errors as defined by docs: https://placid.app/docs/2.0/rest/errors
+    ERROR_CODES = {
+        400: "Bad request",
+        401: "Unauthorized",
+        404: "Template Not found",
+        422: "Validation error",
+        429: "Rate limit exceeded",
+        500: "Internal server error",
+    }
+
+    def __init__(self, api_key):
+        self.api_key = api_key
+
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+        }
+
+    def create_pdf(
+        self,
+        template_uuid: str,
+        current_epc_rating: str,
+        current_epc_rating_colour: str,
+        post_retrofit_epc_rating: str,
+        post_retrofit_epc_rating_colour: str,
+    ):
+        url = "https://api.placid.app/api/rest/pdfs"
+
+        body = {
+            "webhook_success": None,
+            "passthrough": None,
+            "pages": [
+                {
+                    "template_uuid": template_uuid,
+                    "layers": {
+                        "current_epc_rating": {
+                            "text": current_epc_rating,
+                            "text_color": current_epc_rating_colour,
+                        },
+                        "post_retrofit_epc_rating": {
+                            "text": post_retrofit_epc_rating,
+                            "text_color": post_retrofit_epc_rating_colour,
+                        }
+                    },
+                },
+            ]
+        }
+
+        response = requests.post(
+            url,
+            headers=self.headers,
+            json=body
+        )
+
+        response_body = response.json()
+
+        return response_body
+
+    def get_pdf(self, pdf_id: str):
+        """
+        Poll the API every 5 seconds until the PDF is ready
+        """
+        url = f"https://api.placid.app/api/rest/pdfs/{pdf_id}"
+
+        response = requests.get(
+            url,
+            headers=self.headers
+        )
+        response_body = response.json()
+
+        url = response_body["pdf_url"]
+        # Download the PDF form this uurl
+        pdf_download = requests.get(url)
+        with open("survey_report/example_data/output.pdf", "wb") as f:
+            f.write(pdf_download.content)
+
+
+def handler():
+    """
+    Performs the data extraction process for the survey report
+    :return:
+    """
+
+    PLACID_API_KEY = "placid-mpkwidzer2mens9h-hifa3dmbxpfeghpa"
+    TEMPLATE_UUID = "5bst9mh1q9lk9"
+    placid_api = PlacidApi(PLACID_API_KEY)
+
+    current_property_value = 250000  # Needs to be an input
+
+    EPC_COLOURS = {
+        "A": "#117d58",
+        "B": "#2da55c",
+        "C": "#8dbd40",
+        "D": "#f7cd14",
+        "E": "#f3a96a",
+        "F": "#ef8026",
+        "G": "#e41e3b",
+    }
+
+    folders = [
+        {
+            "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 "
+                          "WILLIS ROAD FLAT 1 PRE EPR SITE NOTES.pdf",
+            "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 WILLIS "
+                   "ROAD FLAT 1 PRE EPR PDF.pdf",
+            "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
+                                   "/Flat 1/3 WILLIS ROAD FLAT 1 POST EPR SITE NOTES.pdf"
+        },
+        {
+            "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 "
+                          "WILLIS ROAD FLAT 2 PRE EPR SITE NOTES.pdf",
+            "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 WILLIS "
+                   "ROAD FLAT 2 PRE EPR PDF.pdf",
+            "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
+                                   "/Flat 2/3 WILLIS ROAD FLAT 2 POST EPR SITE NOTES.pdf"
+        },
+        {
+            "site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 "
+                          "WILLIS ROAD FLAT 3 PRE EPR SITE NOTES.pdf",
+            "epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 WILLIS "
+                   "ROAD FLAT 3 PRE EPR PDF.pdf",
+            "scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
+                                   "/Flat 3/3 WILLIS ROAD FLAT 3 POST EPR SITE NOTES.pdf"
+        },
+    ]
+
+    data = []
+    for data_config in folders:
+
+        file_mapping = {}
+        for filename, filepath in data_config.items():
+            with (open(filepath, "rb") as f):
+                pdf = PyPDF2.PdfReader(f)
+                first_page = pdf.pages[0].extract_text()
+                text = ""
+                for page in pdf.pages:
+                    text += page.extract_text()
+
+            # Check the report type
+            report_type = detect_report_type(first_page)
+            if report_type is not None:
+                file_mapping[filename] = text
+
+        # This is only set up to work with quido site notes so we must have it
+        site_notes_extractor = SiteNotesExtractor(file_mapping["site_notes"])
+        site_notes = site_notes_extractor.extract_all()
+
+        # We also must have an EPR
+        epr_extractor = EPRExtractor(file_mapping["epr"])
+        epr = epr_extractor.extract_all()
+
+        # Valuation simulation
+        scenario_site_notes_extractor = SiteNotesExtractor(file_mapping["scenario_site_notes"])
+        scenario_site_notes = scenario_site_notes_extractor.extract_all()
+
+        from backend.ml_models.Valuation import PropertyValuation
+        valuation_uplift = PropertyValuation.estimate_valuation_improvement(
+            current_value=current_property_value,
+            current_epc=site_notes["Current EPC Band"],
+            target_epc=scenario_site_notes["Current EPC Band"],
+        )
+        # TODO - should convert this, when it's more than 5 figures and we should certainly stringify this
+
+        valuation_difference = round(valuation_uplift["average_increased_value"] - current_property_value)
+
+        # Prepare the data for output
+        bill_savings = round(
+            site_notes['Estimated Annual Energy Cost (£)'] - scenario_site_notes['Estimated Annual Energy Cost (£)']
+        )
+
+        carbon_savings = round(
+            site_notes["Current Carbon Emissions (TCO2)"] - scenario_site_notes["Current Carbon Emissions (TCO2)"],
+            2
+        )
+
+        payback_period = None
+        if payback_period is None:
+            raise NotImplementedError("Implement me")
+
+        # We extract the measures from the site notes
+
+        report_data = {
+            "current_epc_rating": site_notes["Current EPC Band"],
+            "current_epc_rating_colour": EPC_COLOURS[site_notes["Current EPC Band"]],
+            "post_retrofit_epc_rating": scenario_site_notes["Current EPC Band"],
+            "post_retrofit_epc_rating_colour": EPC_COLOURS[scenario_site_notes["Current EPC Band"]],
+            "bill_savings": stringify_number(bill_savings),
+            "valuation_improvement": stringify_number(valuation_difference),
+            "carbon_savings": carbon_savings,
+
+        }
+
+        # We now produce the combined data sheet which is the starting figure:
+        # data_sheet = {**epr, **site_notes}
+        # del data_sheet['Building Dimensions']
+        # # We unnest the Total Building Dimensions
+        # data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
+        # data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
+        # del data_sheet["Total Building Dimensions"]
+
+        create_pdf_response = placid_api.create_pdf(
+            template_uuid=TEMPLATE_UUID, **report_data
+        )
+        # {'id': 769832, 'type': 'pdf', 'status': 'queued', 'pdf_url': None, 'transfer_url': None, 'passthrough': None}
+        # Download locally
+        placid_api.get_pdf(create_pdf_response["id"])
+
+    data = pd.DataFrame(data)
+
+    # Generate the HTML report
+    # Placeholder locations
+    template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html"
+    output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html"
+    logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png"
+    generate_html_report(
+        template_path, output_path,
+        data={
+            "address": data_sheet["Address"],
+            "logo_path": logo_path,
+            "current_epc": data_sheet["Current EPC Band"],
+            "current_sap": data_sheet["Current SAP Rating"],
+            "potential_epc": "A",  # TODO PLACEHOLDER
+            "potential_sap": 91,  # TODO PLACEHOLDER
+        }
+    )
diff --git a/survey_report/extraction/detect_report_type.py b/survey_report/extraction/detect_report_type.py
new file mode 100644
index 00000000..434a3fb4
--- /dev/null
+++ b/survey_report/extraction/detect_report_type.py
@@ -0,0 +1,22 @@
+import re
+
+
+def detect_report_type(first_page):
+    """
+    Detects the type of report based on the first page of the report
+    :param first_page:
+    :return:
+    """
+    # Set up for the minute to handle quidos files. We have the Elmhurst logic so we can introduce
+    # this when we need
+
+    if re.match(
+        r"^Created \d{2}/\d{2}/\d{4} for Quidos Ltd using Argyle software BRE approved calculator",
+        first_page
+    ):
+        return "quidos_site_notes"
+
+    if re.search(r"\nIQ-Energy\nEnergy Performance Report\nPage 1 of 1", first_page):
+        return "quidos_epr"
+
+    return None
diff --git a/survey_report/extraction/quidos.py b/survey_report/extraction/quidos.py
new file mode 100644
index 00000000..2e772886
--- /dev/null
+++ b/survey_report/extraction/quidos.py
@@ -0,0 +1,256 @@
+import re
+
+
+class SiteNotesExtractor:
+    """
+    Extracts SAP rating, carbon emissions, and building dimensions from an EPC summary report.
+    """
+
+    def __init__(self, pdf_text):
+        """
+        Initializes the SiteNotesExtractor with the extracted PDF text.
+        """
+        self.text = pdf_text
+        self.data = {}
+
+    def extract_sap_rating(self):
+        """
+        Extracts the current and potential SAP rating from the report.
+        """
+        pattern = re.search(r"Current SAP rating\s*([A-G])\s*(\d+)\s*Potential SAP rating\s*([A-G])\s*(\d+)", self.text)
+
+        if not pattern:
+            raise ValueError("No SAP rating found in the report")
+
+        self.data.update({
+            "Current EPC Band": pattern.group(1),
+            "Current SAP Rating": int(pattern.group(2)),
+            "Potential EPC Band": pattern.group(3),
+            "Potential SAP Rating": int(pattern.group(4)),
+        })
+
+    def extract_carbon_emissions(self):
+        """
+        Extracts the current and adjusted annual carbon emissions (TCO2).
+        """
+        pattern = re.search(r"Current annual emissions\s*([\d.]+)\s*\(TCO2\)", self.text)
+
+        if not pattern:
+            raise ValueError("No carbon emissions found in the report")
+
+        self.data.update({
+            "Current Carbon Emissions (TCO2)": float(pattern.group(1)),
+        })
+
+    def extract_building_dimensions(self):
+        """
+        Extracts dimensions for each building part and stores them in a list.
+        Handles Main Property and multiple extensions.
+        """
+
+        # Locate the Dimensions section
+        dimensions_section = re.search(
+            r"Dimension Type (?:internal|external)\nPart Floor Area \(m2\) Room Height \(m\) Loss Perimeter \(m\) "
+            r"Party Wall "
+            r"Length \(m\)\n"
+            r"(.*?)\n5\.0 Conservatory", self.text, re.DOTALL
+        )
+
+        if not dimensions_section:
+            raise ValueError("Failed to locate the dimensions section in the text.")
+
+        dimensions_text = dimensions_section.group(1)
+
+        # Pattern to match each building part (Main Property, Extension 1, Extension 2, etc.)
+        building_part_pattern = re.compile(
+            r"(Main Property|Extension \d+)\s*(?:Property)?\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)"
+        )
+
+        building_parts = []
+        for match in building_part_pattern.finditer(dimensions_text):
+            to_append = {
+                "Building Part": match.group(1).strip(),
+                "Part Floor Area (m2)": float(match.group(2)),
+                "Room Height (m)": float(match.group(3)),
+                "Loss Perimeter (m)": float(match.group(4)),
+                "Party Wall Length (m)": float(match.group(5)),
+            }
+            # We calculate the heat loss area
+            to_append["Heat Loss Area (m2)"] = to_append["Loss Perimeter (m)"] * to_append["Room Height (m)"]
+            building_parts.append(to_append)
+
+        if not building_parts:
+            raise ValueError("No building dimensions found in the report")
+
+        self.data["Building Dimensions"] = building_parts
+        # We calculate some totals
+        self.data["Total Building Dimensions"] = {
+            "floor_area": sum([part["Part Floor Area (m2)"] for part in building_parts]),
+            "heat_loss_area": sum([part["Heat Loss Area (m2)"] for part in building_parts]),
+        }
+
+    def extract_bills_estimate(self):
+        """
+        Extracts the estimated annual energy costs (£) from the report.
+        """
+        pattern = re.search(r"Current annual energy costs £\s*([\d,.]+)", self.text)
+
+        if not pattern:
+            raise ValueError("No bills estimate found in the report")
+
+        self.data["Estimated Annual Energy Cost (£)"] = float(pattern.group(1).replace(",", ""))
+
+    def extract_all(self):
+        """
+        Runs all extraction methods and returns a dictionary with extracted data.
+        """
+        self.extract_sap_rating()
+        self.extract_carbon_emissions()
+        self.extract_bills_estimate()
+        self.extract_building_dimensions()
+
+        # Extract specific measures
+        # Primary wall
+        # Secondary wall
+        # Roof
+        # Floor
+        # Heating system
+        # Hot water system
+        # Windows
+        # Doors
+        # Lighting
+        # Ventilation
+        # Solar
+
+        return self.data
+
+    def extract_walls(self):
+        """
+        Extracts wall type, insulation, dry-lining, and thickness for each building part,
+        including any alternative wall details within the 7.0 Walls section of the summary PDF text.
+        """
+
+        text = self.text
+        wall_data = []
+
+        # Isolate the 7.0 Walls section
+        wall_section_match = re.search(r"7\.0 Walls\n(.*?)\n8\.0 Roofs", text, re.DOTALL)
+        if not wall_section_match:
+            raise ValueError("Failed to locate the walls section in the text.")
+
+        wall_section = wall_section_match.group(1)
+
+        # Define patterns to match walls for each building part
+        wall_pattern = re.compile(
+            r"(?P<section>Main Property(?: Alternative)?|Extension \d+)\s*\n"
+            r"(?:Construction\s*(?P<construction>[^\n]*)\n)?"
+            r"(?:Insulation\s*(?P<insulation>[^\n]*)\n)?"
+            r"(?:Insulation Thickness\(mm\)\s*(?P<insulation_thickness>[^\n]*)\n)?"
+            r"(?:Wall Thickness Measured\?\s*(?P<thickness_measured>[^\n]*)\n)?"
+            r"(?:Wall Thickness\(mm\)\s*(?P<thickness>\d+))?",
+            re.MULTILINE
+        )
+
+        # TODO: We aren't effectively picking up alternative walls
+        # alt_wall_pattern = re.compile(
+        #     r"Alternative Wall Sheltered\s*.*?\n"
+        #     r".*?Construction\s*(?P<alt_construction>[^\n]*)\n"
+        #     r"Insulation\s*(?P<alt_insulation>[^\n]*)\n"
+        #     r"Insulation Thickness\(mm\)\s*(?P<alt_insulation_thickness>[^\n]*)\n"
+        #     r"Wall Thickness Measured\?\s*(?P<alt_thickness_measured>[^\n]*)\n"
+        #     r"Wall Thickness\(mm\)\s*(?P<alt_thickness>\d+)?",
+        #     re.MULTILINE
+        # )
+
+        for match in wall_pattern.finditer(wall_section):
+            building_part = match.group("section")
+            # has_alternative_wall = "Alternative" in building_part
+            building_part = "Main Property" if "Main Property" in building_part else building_part
+
+            wall_entry = {
+                "Building Part": building_part,
+                "Wall Type": match.group("construction") or "Unknown",
+                "Wall Insulation": match.group("insulation") or "Unknown",
+                "Insulation Thickness (mm)": match.group("insulation_thickness") or "Unknown",
+                "Wall Thickness Measured": match.group("thickness_measured") or "Unknown",
+                "Wall Thickness (mm)": int(match.group("thickness")) if match.group("thickness") and match.group(
+                    "thickness").isdigit() else None,
+                "Alternative Wall Type": None,
+                "Alternative Wall Insulation": None,
+                "Alternative Insulation Thickness (mm)": None,
+                "Alternative Wall Thickness Measured": None,
+                "Alternative Wall Thickness (mm)": None,
+            }
+
+            # Check if an alternative wall section exists
+            # if has_alternative_wall:
+            #     alt_match = alt_wall_pattern.search(wall_section, match.end())
+            #     if alt_match:
+            #         wall_entry["Alternative Wall Type"] = alt_match.group("alt_construction") or "Unknown"
+            #         wall_entry["Alternative Wall Insulation"] = alt_match.group("alt_insulation") or "Unknown"
+            #         wall_entry["Alternative Insulation Thickness (mm)"] = alt_match.group(
+            #             "alt_insulation_thickness") or "Unknown"
+            #         wall_entry["Alternative Wall Thickness Measured"] = alt_match.group(
+            #             "alt_thickness_measured") or "Unknown"
+            #         wall_entry["Alternative Wall Thickness (mm)"] = int(
+            #             alt_match.group("alt_thickness")) if alt_match.group("alt_thickness") and alt_match.group(
+            #             "alt_thickness").isdigit() else None
+
+            wall_data.append(wall_entry)
+
+        return wall_data
+
+
+class EPRExtractor:
+    """
+    Extracts space heating, water heating, and address from an Energy Performance Report (EPR).
+    """
+
+    def __init__(self, pdf_text):
+        """
+        Initializes the EPRExtractor with the extracted PDF text.
+        """
+        self.text = pdf_text
+        self.data = {}
+
+    def extract_heating_consumption(self):
+        """
+        Extracts space heating and water heating values from the report.
+        """
+        pattern = re.search(
+            r"Space Heating\(KWH\)\s*([\d,]+).*?\nWater Heating\(KWH\)\s*([\d,]+)",
+            self.text,
+            re.DOTALL
+        )
+
+        if not pattern:
+            raise ValueError("No heating data found in the report")
+
+        self.data.update({
+            "Space Heating (KWH)": int(pattern.group(1).replace(",", "")),
+            "Water Heating (KWH)": int(pattern.group(2).replace(",", ""))
+        })
+
+    def extract_address(self):
+        """
+        Extracts the full address from the report.
+        """
+        pattern = re.search(
+            r"Address\s*(.*?)\nTown\s*(.*?)\n",
+            self.text,
+            re.DOTALL
+        )
+
+        if not pattern:
+            raise ValueError("No address found in the report")
+
+        full_address = pattern.group(1).strip()
+        self.data["Address"] = full_address
+
+    def extract_all(self):
+        """
+        Runs all extraction methods and returns a dictionary with extracted data.
+        """
+        self.extract_address()
+        self.extract_heating_consumption()
+        return self.data
diff --git a/etl/route_march_data_pull/requirements.txt b/survey_report/requirements.txt
similarity index 100%
rename from etl/route_march_data_pull/requirements.txt
rename to survey_report/requirements.txt
diff --git a/survey_report/template.html b/survey_report/template.html
new file mode 100644
index 00000000..5d3b6c63
--- /dev/null
+++ b/survey_report/template.html
@@ -0,0 +1,123 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Domna Energy Report</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #ffffff;
+            color: #333;
+            margin: 0;
+            padding: 0;
+            display: flex;
+            justify-content: center;
+        }
+        .container {
+            width: 100%;
+            max-width: 1300px;
+            margin: 20px auto;
+        }
+        .header {
+            background-color: #1B1F3B;
+            color: white;
+            padding: 30px;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            border-radius: 12px;
+        }
+        .header h1 {
+            margin: 5;
+            font-size: 24px;
+        }
+        .header p {
+            margin: 5px 0 0;
+            font-size: 16px;
+            color: #d1d5db;
+        }
+        .logo img {
+            height: 60px;
+        }
+
+        /* EPC Rating Cards */
+        .epc-container {
+            display: flex;
+            justify-content: space-between;
+            gap: 20px;
+            margin-top: 30px;
+        }
+        .epc-card {
+            background-color: white;
+            border: 2px solid #ccc;
+            border-radius: 10px;
+            padding: 20px;
+            flex: 1;
+            display: flex;
+            flex-direction: column;
+            justify-content: space-between; /* Pushes SAP to bottom */
+            align-items: center;
+            text-align: center;
+            box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
+            position: relative;
+            height: 160px;
+        }
+        .epc-title {
+            font-size: 18px;
+            font-weight: bold;
+            color: #666;
+        }
+        .epc-rating {
+            font-size: 50px;
+            font-weight: bold;
+        }
+        .sap-rating {
+            font-size: 18px;
+            color: #555;
+            position: absolute;
+            bottom: 10px;
+            right: 20px;
+        }
+        .before .epc-rating {
+            color: #1B1F3B; /* Medium Blue */
+        }
+        .after .epc-rating {
+            color: #D4AF37; /* Gold */
+        }
+
+    </style>
+</head>
+<body>
+
+    <div class="container">
+        <!-- Header Section -->
+        <div class="header">
+            <div>
+                <h1>Domna Energy Report</h1>
+                <p>${address}</p> <!-- Address Placeholder -->
+            </div>
+            <div class="logo">
+                <img src="${logo_path}" alt="Domna Logo">
+            </div>
+        </div>
+
+        <!-- EPC Rating Cards -->
+        <div class="epc-container">
+            <div class="epc-card before">
+                <div class="epc-title">Current EPC Rating</div>
+                <div class="epc-rating">${current_epc}</div>
+                <div class="sap-rating">SAP ${current_sap}</div>
+            </div>
+
+            <div class="epc-card after">
+                <div class="epc-title">Potential EPC Rating</div>
+                <div class="epc-rating">${potential_epc}</div>
+                <div class="sap-rating">SAP ${potential_sap}</div>
+            </div>
+        </div>
+
+    </div>
+
+</body>
+</html>