diff --git a/.idea/.gitignore b/.idea/.gitignore
index 26d33521..8f00030d 100644
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -1,3 +1,5 @@
 # Default ignored files
 /shelf/
 /workspace.xml
+# GitHub Copilot persisted chat sessions
+/copilot/chatSessions
diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index 4a55e504..f86e33dc 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -147,7 +147,8 @@ class Property:
         # self.base_difference_record.df
 
     def adjust_difference_record_with_recommendations(
-        self, property_recommendations,
+        self,
+        property_recommendations,
         property_representative_recommendations
     ):
         """
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 4f6fd33d..cc2ee4a9 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -30,7 +30,7 @@ vartypes = {
     'environment-impact-potential': "Int64",
     'glazed-type': 'str',
     'heating-cost-current': 'float',
-    'address3': 'str',
+    # 'address3': 'str',
     'mainheatcont-description': 'str',
     'sheating-energy-eff': 'str',
     'property-type': 'str',
@@ -40,7 +40,7 @@ vartypes = {
     'mechanical-ventilation': 'str',
     'hot-water-cost-current': 'str',
     'county': 'str',
-    'postcode': 'str',
+    # 'postcode': 'str',
     'solar-water-heating-flag': 'str',
     'constituency': 'str',
     'co2-emissions-potential': 'float',
@@ -55,7 +55,7 @@ vartypes = {
     # 'inspection-date': str,
     'mains-gas-flag': 'str',
     'co2-emiss-curr-per-floor-area': 'float',
-    'address1': 'str',
+    # 'address1': 'str',
     'heat-loss-corridor': 'str',
     'flat-storey-count': "Int64",
     'constituency-label': 'str',
@@ -67,7 +67,7 @@ vartypes = {
     'roof-description': 'str',
     'floor-energy-eff': 'str',
     'number-habitable-rooms': 'float',
-    'address2': 'str',
+    # 'address2': 'str',
     'hot-water-env-eff': 'str',
     'posttown': 'str',
     'mainheatc-energy-eff': 'str',
@@ -98,7 +98,7 @@ vartypes = {
     # 'lodgement-date',
     'extension-count': "Int64",
     'mainheatc-env-eff': 'str',
-    'lmk-key': 'str',
+    # 'lmk-key': 'str',
     'wind-turbine-count': "Int64",
     'tenure': 'str',
     'floor-level': 'str',
@@ -147,6 +147,7 @@ class SearchEpc:
         uprn: [int, None] = None,
         size=None,
         property_type=None,
+        fast=False
     ):
         """
         Address lines 1 and postcode are mandatory fields. The other address lines are optional
@@ -187,6 +188,7 @@ class SearchEpc:
         self.size = size if size is not None else 25
 
         self.property_type = property_type
+        self.fast = fast
 
     @classmethod
     def get_house_number(cls, address: str) -> str | None:
@@ -365,9 +367,6 @@ class SearchEpc:
         # Finally, we identify the newest epc and the rest, and then return
         newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
 
-        # Retrieve postcode and address
-        address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
-
         # Ge the uprn from the newest record for this home
         uprns = {r["uprn"] for r in rows if r["uprn"]}
         # We can sometimes have no uprn for a property
@@ -384,6 +383,12 @@ class SearchEpc:
 
         uprn = uprns.pop() if uprns else None
 
+        if self.fast:
+            return newest_epc, [], {}, "", "", None
+
+        # Retrieve postcode and address
+        address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
+
         return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
 
     @staticmethod
@@ -575,6 +580,11 @@ class SearchEpc:
             property_type=property_type
         )
 
+        # If we have missing lodgment date, we fill it with inspection-date
+        epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
+        # If we still have missing dates, we set it to the mean of the non NA dates
+        epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
+
         # For each attribute, we need to determine the datatype and use an appropriate method
         # to estimate.
         estimated_epc = {}
@@ -609,7 +619,11 @@ class SearchEpc:
         # Insert an estimated lodgement datetime, with a weighted average
         estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
         # Extract logement date
-        estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
+        # It is possible that there is still no lodgement date, so we need to handle this
+        if pd.isnull(estimated_epc["lodgement-datetime"]):
+            estimated_epc["lodgement-date"] = None
+        else:
+            estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
 
         estimated_epc["postcode"] = self.postcode
         estimated_epc["uprn"] = self.uprn
diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index 906ff594..b594579f 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -145,6 +145,7 @@ class Eligibility:
                 "reason": None,
                 "thickness_classification": thickness_classification
             }
+            return
 
         # Insulation is already thick enough
         self.loft = {
@@ -164,8 +165,10 @@ class Eligibility:
         """
 
         is_cavity = self.walls["is_cavity_wall"]
-        is_empty = (not self.walls["is_filled_cavity"]) or (
+        is_empty = (not self.walls["is_filled_cavity"])
+        is_as_built = (
             self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
+            and self.walls["is_assumed"]
         )
         is_partial_filled = "partial" in self.walls["clean_description"].lower()
         # We look for potentially under performing cavities - anything that is assumed, as built and insulated
@@ -175,6 +178,7 @@ class Eligibility:
 
         is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
         is_partial_filled_cavity = is_cavity and is_partial_filled
+        is_assumed_filled_cavity = is_cavity and is_as_built
         is_underperforming_cavity = is_cavity and is_underperforming
 
         # Check if it has internal or external wall insulation
@@ -195,6 +199,13 @@ class Eligibility:
             }
             return
 
+        if is_assumed_filled_cavity:
+            self.cavity = {
+                "suitability": True,
+                "type": "as built assumed",
+            }
+            return
+
         if is_partial_filled_cavity:
             self.cavity = {
                 "suitability": True,
@@ -340,13 +351,35 @@ class Eligibility:
 
         # Check if the property is suitable for cavity wall
         self.cavity_insulation()
-        self.loft_insulation()
 
-        self.gbis_warmfront = (self.cavity["suitability"]) and (
-            int(self.epc["current-energy-efficiency"]) <= 68
-        )
+        current_sap = int(self.epc["current-energy-efficiency"])
+        # We have a strict suitability check and a non-strict check
 
-    def check_eco4_warmfront(self, post_retrofit_sap=None):
+        # Perfect strictness
+        if (self.cavity["type"] == "empty") and (current_sap < 69):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
+            }
+            return
+
+        # Near perfect
+        if self.cavity["suitability"] and (current_sap < 69):
+            self.gbis_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Near perfect suitability",
+            }
+            return
+
+        self.gbis_warmfront = {
+            "eligible": False,
+            "strict": False,
+            "message": "All conditions fail",
+        }
+
+    def check_eco4_warmfront(self):
         """
         This funciton will check if the property is eligible for funding under the ECO4 scheme
 
@@ -378,49 +411,121 @@ class Eligibility:
         self.cavity_insulation()
         self.loft_insulation()
 
-        # make sure conditions 2 and 3 are true
-        is_eligible = self.cavity["suitability"] & self.loft["suitability"]
+        # We put in a placeholder when the roof is not a loft
+        if self.loft["reason"] == "roof not loft":
+            self.loft["thickness"] = 999
 
-        if current_sap >= 69:
+        # Case 1: No conditions meet
+        if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
             self.eco4_warmfront = {
                 "eligible": False,
-                "message": "sap too high",
+                "strict": False,
+                "message": "All conditions fail",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        if post_retrofit_sap is None:
-
-            if current_sap >= 55:
-                message = "Possibly eligible but property currently EPC D"
-            else:
-                message = "subject to post retrofit sap" if is_eligible else "not eligible"
-
-            # Update the message to flag properties that failed just because of a full cavity.
-            # We need to double check that the wall is a cavity, that the loft is suitable and that the
-            # sap is within reason
-            # We can then estimate the age of the cavity fill
-            if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
-                message = "Failed due to full cavity - check cavity age"
-
+        # Case 2 - perfect match
+        if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55):
             self.eco4_warmfront = {
-                "eligible": is_eligible,
-                "message": message,
+                "eligible": True,
+                "strict": True,
+                "message": "Perfect suitability",
                 "cavity_type": self.cavity["type"],
                 "loft_type": self.loft["thickness_classification"]
             }
             return
 
-        is_eligible = is_eligible & (post_retrofit_sap >= 69)
+        # Case 2.5 - near perfect match - but we would not recommend this using the model
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": True,
+                "message": "Near perfect suitability",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
 
-        self.eco4_warmfront = {
-            "eligible": is_eligible,
-            "message": None,
-            "cavity_type": self.cavity["type"],
-            "loft_type": self.loft["thickness_classification"]
-        }
-        return
+        # Case 3 - cavity is suitable, loft is within 150mm, sap is good
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity, loft borderline, meets sap",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 3 - cavity is suitable, loft is not, sap is good
+        if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets cavity and sap",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible
+        if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "failed fabric check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 5 - cavity and loft suitable, sap too high
+        if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets fabric, fails SAP check",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 6 - meets just cavity
+        if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55):
+            self.eco4_warmfront = {
+                "eligible": True,
+                "strict": False,
+                "message": "Meets just cavity",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 7 - fails cavity, loft but meets sap
+        if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "Fails cavity and loft, meets SAP",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        # Case 8 - fails cavity, meets loft, fails sap
+        if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
+            self.eco4_warmfront = {
+                "eligible": False,
+                "strict": False,
+                "message": "Fails cavity, meets loft, fails SAP",
+                "cavity_type": self.cavity["type"],
+                "loft_type": self.loft["thickness_classification"]
+            }
+            return
+
+        raise ValueError("Implement me")
 
     def check_gbis(self):
 
diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py
index a68bf272..378a0e83 100644
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@@ -387,17 +387,19 @@ def prepare_model_data_row(
     }
 
     simulations = [
-        [cavity_simulation],
-        [loft_simulation]
+        cavity_simulation,
+        loft_simulation
     ]
 
-    p.adjust_difference_record_with_recommendations(simulations)
+    recommendation_record = p.base_difference_record.df.to_dict("records")[0].copy()
+    scoring_dict = p.create_recommendation_scoring_data(
+        property_id=p.id,
+        recommendation_record=recommendation_record,
+        recommendations=simulations,
+        primary_recommendation_id=cavity_simulation["recommendation_id"]
+    )
 
-    # Make sure we definitely have the correct data
-    cavity_scoring = [x for x in p.recommendations_scoring_data if "cavity" in x["id"]][0]
-    loft_scoring = [x for x in p.recommendations_scoring_data if "loft" in x["id"]][0]
-
-    return [cavity_scoring, loft_scoring]
+    return [scoring_dict]
 
 
 def get_ha_32data(ha_data, cleaned, cleaning_data, created_at):
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 92956337..e414cd00 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1,11 +1,15 @@
 import os
+import re
 import openpyxl
+import Levenshtein
 from pathlib import Path
 import msgpack
 from datetime import datetime
 import pandas as pd
 import numpy as np
-from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3
+from utils.s3 import (
+    read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3, save_dataframe_to_s3_parquet
+)
 from utils.logger import setup_logger
 from dotenv import load_dotenv
 from tqdm import tqdm
@@ -15,6 +19,10 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
+from etl.epc.Record import EPCRecord
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc.DataProcessor import EPCDataProcessor
+from datetime import datetime
 
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@@ -23,6 +31,486 @@ DATA_FOLDER = Path(__file__).parent / "local_data" / "ha_data"
 logger = setup_logger()
 load_dotenv(ENV_FILE)
 
+PROPERTY_TYPE_LOOKUP = {
+    "HA1": {
+        "built_form": {
+            'Mid Terrace': 'Mid-Terrace',
+            'Semi-Detached': 'Semi-Detached',
+            'End Terrace': 'End-Terrace',
+            'Detached': 'Detached',
+            'Enclosed Mid': 'Mid-Terrace',
+            'Detached Local Connect': 'Detached',
+        }
+    },
+    "HA2": {
+        'HOUSE': 'House',
+        'FLAT': 'Flat',
+        'SHELTERED': None,
+        'BUNGALOW': 'Bungalow',
+        'BED-SIT': None,
+        'MAISONETTE': "Maisonette",
+        'HOSTEL': None
+    },
+    "HA5": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Bedsit": None
+    },
+    "HA6": {
+        "property_type": {
+            'HOUSE': "House",
+            'GROUND FLOOR FLAT': "Flat",
+            'UPPER FLOOR FLAT': "Flat",
+            'MAISONETTE': "Maisonette",
+            'BUNGALOW': "Bungalow",
+            'WARDEN BUNGALOW': "Bungalow",
+            'WARDEN FLAT': "Flat",
+            'EXTRACARE SCHEME': "Flat",
+        }
+    },
+    "HA7": {
+        "property_type": {
+            "House": "House",
+            "Flat": "Flat",
+            "Bungalow": "Bungalow",
+            "Maisonette": "Maisonette",
+        },
+        "built_form": {
+            "Semi Detached": "Semi-Detached",
+            "Mid Terrace": "Mid-Terrace",
+            "End Terrace": "End-Terrace",
+            "Detached": "Detached",
+            "End Terraced": "End-Terrace",
+        }
+    },
+    "HA8": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Bedsit": None,
+        "Room": None,
+        "Other": None,
+        "Commerical": None
+    },
+    "HA11": {
+        "Flat": "Flat",
+        "House": "House",
+        "Semi-Det House": "House",
+        "Bedsit": None,
+        "End-Terr House": "House",
+        "Mid-Terr House": "House",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "End Terr Flat": "Flat",
+        "Mid Terr Flat": "Flat",
+        "Detached Flat": "Flat",
+    },
+    "HA12": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Bedsit": None,
+    },
+    "HA13": {
+        'House': "House",
+        'Flat': "Flat",
+        'House MT': "House",
+        'House SD': "House",
+        'House ET': "House",
+        'Bungalow MT': "Bungalow",
+        'Bungalow ET': "Bungalow",
+        'ii': None,
+    },
+    "HA14": {
+        "property_type": {
+            "House": "House",
+            "Flat": "Flat",
+            "Bungalow": "Bungalow",
+            "Maisonette": "Maisonette",
+        }
+    },
+    "HA15": {
+        'House': 'House',
+        'Flat': 'Flat',
+        'Bungalow': 'Bungalow',
+        'Maisonette': 'Maisonette',
+        'Flat over garage': 'Flat',
+    },
+    "HA16": {
+        'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+        'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
+        'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
+        'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
+        'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
+        'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+        'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Detached House': {"property-type": "House", "built-form": "Detached"},
+        'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
+        'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
+        'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+        'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
+    },
+    "HA18": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Bedsit": None,
+        "Shop": None,
+        "Hostel": None,
+        "Block": None,
+    },
+    "HA20": {
+        "House": "House",
+        "Flat": "Flat",
+        'Sheltered Flat': "Flat",
+        'Maisonette': 'Maisonette',
+        'Bungalow': 'Bungalow',
+        'House. SD': 'House',
+        'House. MT': 'House',
+        'House. ET': 'House',
+        'Sheltered Bungalow': 'Bungalow',
+        'Guest Accomodation': None,
+        'Sheltered House': 'House',
+        'House. MT ': 'House',
+        'House. D': 'House'
+    },
+    "HA24": {
+        '01 HOUSE': 'House',
+        '02 FLAT': 'Flat',
+        '03 BUNGALOW': 'Bungalow',
+        '10 PBUNGALOW': 'Bungalow',
+        '01 HOUSE MID': 'House',
+        '13 SBUNGALOW': 'Bungalow',
+        '12 SBEDSIT': None,  # BEDSIT does not match the specified property types
+        '14 SFLAT': 'Flat',
+        '05 BEDSIT': None,
+        '04 MAISONETTE': 'Maisonette',
+        '11 PFLAT': 'Flat',
+        '09 PBEDSIT': None
+    },
+    "HA25": {
+        'Flat': 'Flat',
+        'Mid Terrace House': 'House',
+        'Semi Detached House': 'House',
+        'End Terrace House': 'House',
+        'House': 'House',
+        'Semi Detached Bung': 'Bungalow',
+        'Bungalow': 'Bungalow',
+        'End Terrace Bungalow': 'Bungalow',
+        'Maisonnette': 'Maisonette',
+        'Mid Terrace Bungalow': 'Bungalow',
+        'Bedspace': None,
+        'Detached House': 'House',
+        'Bedsit': 'Flat',
+        'Coach House': 'House',
+        'Detached Bungalow': 'Bungalow',
+        'Office Buildings': None,
+        'Guest Room': None,
+        'Mid Terrace Housekeeping ': 'House',
+        'End Terrace Housex': 'House'
+    },
+    "HA28": {
+        'Flat': 'Flat',
+        'Semi detached house': 'House',
+        'Terraced house': 'House',
+        'Maisonette flat': 'Maisonette',
+        'Sheltered bedsit': None,
+        'APD flat': 'Flat',
+        'Bungalow terraced': 'Bungalow',
+        'Flat with partition': 'Flat',
+        'Bungalow semi detached': 'Bungalow',
+        'APD Bungalow': 'Bungalow',
+        'Sheltered flat': 'Flat',
+        'Bedsit Flat': 'Flat',
+        'Bedsit bungalow semi detached': 'Bungalow',
+        'Sheltered bungalow terraced': 'Bungalow',
+        'Sheltered bedsit disabled': None,
+        'Bedsit bungalow terraced': 'Bungalow',
+        'Sheltered bungalow semi detached': 'Bungalow',
+        'Sheltered warden flat': 'Flat',
+        'Bungalow detached': 'Bungalow',
+        'Block': None,  # Does not match the specified property types
+        'End Terraced House': 'House',
+        'Mid Terraced House': 'House',
+        '#N/A': None,  # Assuming this is an invalid or missing entry
+        0: None  # Assuming 0 is also an invalid or missing entry
+    },
+    "HA30": {
+        'House': 'House',
+        'Flat': 'Flat',
+        'Bungalow': 'Bungalow',
+        'House with Attached Garage': 'House',
+        'Bed Space': None,  # Assuming this does not fit the specified property types
+        'House with Garage': 'House',
+        'Bungalow with Wheelchair Access': 'Bungalow',
+        'Maisonette': 'Maisonette',
+        'Flat with Wheelchair Access': 'Flat',
+        'Bedsit': None,  # Assuming this does not fit the specified property types
+        'Flat w Wheelchair Access & Car Park': 'Flat',
+        'House with Wheelchair Access': 'House',
+        'Bungalow w Wheelchair Access & Car ': 'Bungalow'
+    },
+    "HA32": {
+        'Bungalow': 'Bungalow',
+        'Flat': 'Flat',
+        'Bungalow Disabled': 'Bungalow',  # "Disabled" properties categorized with their base type
+        'House': 'House',
+        'Dormer Bungalow': 'Bungalow',
+        'Pop-In': None,  # Does not fit the specified property types
+        'Flat Disabled': 'Flat',
+        'Laundry': None,  # Does not fit the specified property types
+        'Bedsit': None,  # Excluded from the given categories
+        'Shed': None,  # Does not fit the specified property types
+        'Store Room': None  # Does not fit the specified property types
+    },
+    "HA34": {
+        'Flat': 'Flat',
+        'House': 'House',
+        'Bungalow': 'Bungalow',
+        'Maisonette': 'Maisonette',
+        'ND': None,
+    },
+    "HA35": {
+        "Flat": "Flat",
+        "Maisonette": "Maisonette",
+        "House": "House",
+        "Bedsit": None,
+        "2 Bedroom Unknown": None,
+        "1 Bedroom Unknown": None,
+        "3 Bedroom Unknown": None,
+        "4 Bedroom Unknown": None,
+    },
+    "HA37": {
+        "FLT": "Flat",
+        "HSE": "House",
+        "BNW": "Bungalow",
+        "MAS": "Maisonette",
+        "HSL": None
+    },
+    "HA39": {
+        "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
+        "1st floor flat": {"property_type": "Flat", "built_form": None},
+        "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"},
+        "Ground floor flat": {"property_type": "Flat", "built_form": None},
+        "End terrace house": {"property_type": "House", "built_form": "End-Terrace"},
+        "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"},
+        "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"},
+        "2nd floor flat": {"property_type": "Flat", "built_form": None},
+        "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"},
+        "3rd floor flat": {"property_type": "Flat", "built_form": None},
+        "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
+        "Maisonette": {"property_type": "Maisonette", "built_form": None},
+        "Detached house": {"property_type": "House", "built_form": "Detached"},
+        "Lower ground floor flat": {"property_type": "Flat", "built_form": None},
+        "Dormer bungalow": {"property_type": "Bungalow", "built_form": None},
+        "Basement flat": {"property_type": "Flat", "built_form": None},
+        "Cluster House": {"property_type": "House", "built_form": "Detached"},
+        "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None},
+        "Ground floor flat with study": {"property_type": "Flat", "built_form": None},
+        "4th floor flat": {"property_type": "Flat", "built_form": None},
+        "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
+        "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
+    },
+    "HA41": {
+        'Garage': None,
+        'House 1919-1945': 'House',
+        'House 1946-1964': 'House',
+        'Flats & Maisonettes post 1974': 'Flat',
+        'Non traditional houses': 'House',
+        'Sheltered': None,
+        'Flats & Maisonettes 1965-1974': 'Flat',
+        'House post 1974': 'House',
+        'Block': None,
+        'Flats & Maisonettes 1946-1964': 'Flat',
+        'House 1965-1974': 'House',
+        'Non traditional flats': 'Flat',
+        'Bungalow 1965-1974': 'Bungalow',
+        'PIMSS EMPTY': None,
+        'Bungalow post 1974': 'Bungalow',
+        'Bungalow 1946-1964': 'Bungalow',
+        'Flats & Maisonettes 1919-1945': 'Flat',
+        'House pre 1919': 'House',
+        'Flats & Maisonettes pre 1919': 'Flat',
+        'Bungalow 1919-1945': 'Bungalow',
+        'Office': None
+    },
+    "HA42": {
+        'Flat': 'Flat',
+        'House': 'House',
+        'Flat Basement': 'Flat',
+        'Room': None,
+        'Bedsit Flat': 'Flat',
+        'Maisonette': 'Maisonette',
+        'Scheme Office': None,
+        'Scheme Lounge': None,
+        'Bungalow': 'Bungalow',
+        'Garage': None,
+        'Scheme Sleep Room': None,
+        'Cluster': None,
+        'Scheme Room': None
+    },
+    "HA45": {
+        'Large block of flats': 'Flat',
+        'Small block of flats/dwelling converted in to flats': 'Flat',
+        'Semi-detached house': 'House',
+        'Mid-terraced house': 'House',
+        'End-terraced house': 'House',
+        'Block of flats': 'Flat',
+        'Detached house': 'House',
+        'Flat in mixed use building': 'Flat',
+    },
+    "HA48": {
+        "House": "House",
+        "Flat": "Flat",
+        "Bungalow": "Bungalow",
+        "Maisonette": "Maisonette",
+        "Unit": None
+    },
+    "HA50": {
+        'House': 'House',
+        'Bungalow': 'Bungalow',
+        'Flat': 'Flat',
+        'House SD': 'House',
+        'House MT': 'House',
+        'House ET': 'House',
+        'Bungalow ET': 'Bungalow',
+        'House SD ': 'House',
+        'House. SD': 'House',
+        'Bungalow SD': 'Bungalow',
+        'Bungalow MT': 'Bungalow',
+        'Bungalow D': 'Bungalow',
+        'House D': 'House',
+        'House. MT': 'House',
+        'House ': 'House',
+        'House ET ': 'House',
+        ' ': None,
+        'Flat?': 'Flat',
+        'Bungalow ': 'Bungalow'
+    },
+    "HA51": {
+        'FLAT': 'Flat',
+        'HOUSE': 'House',
+        'MAISONETTE': 'Maisonette',
+        'BEDSIT': None,  # Considering as a non-specific residential category here
+        'BUNGALOW': 'Bungalow',
+    },
+    "HA52": {
+        'House - Mid Terrace': 'House',
+        'Flat - First Floor': 'Flat',
+        'Flat - Ground Floor': 'Flat',
+        'House - Semi-Detached': 'House',
+        'House - End Terrace': 'House',
+        'Flat - Second Floor': 'Flat',
+        'Bedsit': None,  # Considering as a non-specific residential category here
+        'Bungalow - Semi-Detached': 'Bungalow',
+        'Bungalow - Mid Terrace': 'Bungalow',
+        'Bungalow - End Terrace': 'Bungalow',
+        'House - Detached': 'House',
+        'Flat - Third Floor': 'Flat',
+        'House attached to flats': 'House',
+        'Flat - Fourth Floor': 'Flat',
+        'Bungalow - Detached': 'Bungalow'
+    },
+    "HA56": {
+        'House Non Specific': 'House',
+        'HOUSE TERRACED': 'House',
+        'HOUSE - SEMI DETACHD': 'House',
+        'Bungalow': 'Bungalow',
+        'House - End Terraced': 'House',
+        'Block': None,
+        'Block with Communal': None,
+        'Bungalow - Terraced': 'Bungalow',
+        'Bungalow - Semi Dtch': 'Bungalow',
+        'Block House with rooms': None,
+        'Bungalow - End Terr': 'Bungalow',
+        'House - Mid Terraced': 'House',
+        'Bungalow - Detached': 'Bungalow',
+        'House - Detached': 'House',
+        'HOUSE THREE STOREY': 'House',
+        'Maisonette': 'Maisonette',
+        'Communal Block': None,
+        'Scheme': None
+    },
+    "HA63": {
+        'Flat': 'Flat',
+        'House - Semi detached': 'House',
+        'House - Detached': 'House',
+        'House - End Terrace': 'House',
+        'House - Mid Terrace': 'House',
+        'Bungalow - Semi detached': 'Bungalow',
+        'Bungalow': 'Bungalow',
+        'Bedsit': None,  # Considering as a non-specific residential category here
+        'Maisonette': 'Maisonette',
+        'Bungalow - End Terrace': 'Bungalow',
+        'Bungalow - Detached': 'Bungalow',
+        'Maisonette - Mid Terrace': 'Maisonette',
+        'Maisonette - End Terrace': 'Maisonette',
+        'Studio Flat': 'Flat',
+        'Maisonette - Detached': 'Maisonette',
+        'Bungalow - Mid Terrace': 'Bungalow',
+        'Bedsit - Mid Terrace': None,
+        'Bedsit - End Terrace': None,
+        'Amenity Block - Semi detached': None,  # Assuming non-residential
+        'Maisonette - Semi Detached': 'Maisonette',
+        'Amenity Block - Detached': None,  # Assuming non-residential
+        'Hostel': None,  # Typically not considered a standard residential property for this context
+        'Bungalow - Attached': 'Bungalow',
+        'Unknown': None,  # Not enough information to categorize
+        'Studio Flat - Mid Terrace': 'Flat',
+        'Chalet - Wheelchair': None  # Specialized type, not categorized here
+    },
+    "HA107": {
+        "property_type": {
+            "HOUSE": "House",
+            "BUNGALOW": "Bungalow",
+            "GRD FLOOR FLAT": "Flat",
+            "FIRST FLOOR FLAT": "Flat",
+            "SHELTERED BUNGALOW": "Bungalow",
+            "MAISONETTE": "Maisonette",
+            "SECOND FLOOR FLAT": "Flat",
+            "SHELTERED FIRST FLR": "Flat",
+            "SHELTERED GROUND FLR": "Flat",
+            "GRD FLOOR BED SIT": "House"
+        },
+        "built_form": {
+            "Semi Detached": "Semi-Detached",
+            "Mid Terrace": "Mid-Terrace",
+            "End Terrace": "End-Terrace",
+            "Detached": "Detached",
+            "Detatched": "Detached",
+        }
+    },
+    "HA117": {
+        "Flat": "Flat",
+        "House": "House",
+        "Bungalow": "Bungalow",
+        "Flat over garage/underpass": "Flat",
+    },
+    "HAXXX": {
+        'mid terraced house': 'House',
+        'semi detached house': 'House',
+        '1st fl 4 in a block': 'Flat',
+        'G/F 4 in a block': 'Flat',
+        'end terraced house': 'House',
+        '1st floor flat': 'Flat',
+        'G/F floor flat': 'Flat',
+        'semi detached bungalow': 'Bungalow',
+        '2nd floor flat': 'Flat',
+        'mid terrace bungalow': 'Bungalow',
+        'detached bungalow': 'Bungalow',
+        'end terrace bungalow': 'Bungalow',
+        'Staff accommodation': None  # Marked as None due to its special nature
+    }
+}
+
 
 class DataLoader:
     COLUMN_CONFIG = {
@@ -30,35 +518,256 @@ class DataLoader:
             "address": "Address",
             "postcode": "Address - Postcode"
         },
+        "HA5": {
+            "address": "Address",
+            "postcode": "matching_postcode"
+        },
         "HA6": {
             "address": "propertyaddress",
             "postcode": "address"  # The 'address' column actually contains postcode
+        },
+        "HA12": {
+            "address": "Full Address",
+            "postcode": "Postcode"
+        },
+        "HA16": {
+            "address": "Address",
+            "postcode": "Postcode"
+        },
+        "HA24": {
+            "address": "Address",
+            "postcode": "Postcode"
+        },
+        "HA25": {
+            "address": "T1_Address",
+            "postcode": "matching_postcode"
+        },
+        "HA30": {
+            "address": "A_Address",
+            "postcode": "A_Postcode"
+        },
+        "HA31": {
+            "address": "A_Address",
+            "postcode": "matching_postcode"
+        },
+        "HA45": {
+            "address": "Full postal address",
+            "postcode": "Postcode"
+        },
+        "HA48": {
+            "address": "Full Address",
+            "postcode": "Postcode"
+        },
+        "HA49": {
+            "address": "Property Address Full",
+            "postcode": "Property Postcode"
+        },
+        "HA52": {
+            "address": "Postal Address",
+            "postcode": "POSTCODE"
+        },
+        "HA54": {
+            "address": "Postal Address",
+            "postcode": "matching_postcode"
         }
     }
 
-    def __init__(self, directories, use_cache):
+    UNMATCHED_CIGA = {
+        "HA2": 0,
+        "HA6": 117,
+        "HA9": 0,
+        "HA12": 6,
+        "HA13": 119,
+        "HA14": 3,
+        "HA15": 3,
+        "HA16": 7,
+        "HA24": 12,
+        "HA50": 4,
+        "HA63": 15,
+        "HA107": 51,
+        "HA48": 0,
+        "HA45": 0,
+        "HA52": 5,
+        "HA20": 6
+    }
+
+    UNMATCHED_ECO3 = {
+        "HA25": 154,
+        "HA41": 26,
+        "HA50": 5,
+        "HA56": 320,
+        "HA63": 0,
+        "HA117": 4,
+        "HA51": 24
+    }
+
+    def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
         self.directories = directories
         self.use_cache = use_cache
+        self.december_figures_filepath = december_figures_filepath
+        self.rebuild = rebuild
 
         self.data = {}
+        self.december_figures = None
+        self.facts_and_figures = None
 
     def create_asset_list_matching_address(self, ha_name, asset_list):
 
-        if ha_name in ["HA1", "HA6"]:
+        if ha_name in [
+            "HA1", "HA5", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"
+        ]:
             asset_list["matching_address"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["address"]
-            ].str.lower().str.strip()
+            ].astype(str).str.lower().str.strip()
             asset_list["matching_postcode"] = asset_list[
                 self.COLUMN_CONFIG[ha_name]["postcode"]
-            ].str.lower().str.strip()
+            ].astype(str).str.lower().str.strip()
+        elif ha_name == "HA2":
+            # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA7":
+            # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
+            asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA8":
+            asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA9":
+            asset_list["matching_address"] = asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA11":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA13":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA14":
             # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
-            asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address 2"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address 3"].str.lower().str.strip() + ", " + \
-                                             asset_list["Address 4"].str.lower().str.strip() + ", " + \
-                                             asset_list["Postcode"].str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA15":
+            asset_list["matching_address"] = (
+                asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA18":
+            asset_list["matching_address"] = (
+                asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Post Code"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA19":
+            asset_list["matching_address"] = (
+                asset_list["Address1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address2"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address3"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA20":
+            asset_list["matching_address"] = (
+                asset_list["House Name"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Block"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA21":
+            asset_list["matching_address"] = (
+                asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["PostCode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA25":
+            asset_list["matching_address"] = asset_list[
+                self.COLUMN_CONFIG[ha_name]["address"]
+            ].astype(str).str.lower().str.strip()
+
+            asset_list["matching_postcode"] = asset_list['matching_address'].apply(
+                lambda x: ' '.join(x.split()[-2:]) if pd.notnull(x) else x
+            )
+        elif ha_name == "HA27":
+            asset_list["matching_address"] = (
+                asset_list[" Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list[" Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA28":
+            asset_list["matching_address"] = (
+                asset_list["House Number"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Street 1"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA32":
+            asset_list["matching_address"] = (
+                asset_list["Dwelling num"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Street"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA33":
+            asset_list["matching_address"] = (
+                asset_list["ADDRESS"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["POST CODE"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA34":
+            asset_list["matching_address"] = (
+                asset_list[" Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list[" Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA35":
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Address Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA37":
+            asset_list["matching_address"] = asset_list["ADDRESS LINE 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["ADDRESS LINE 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["ADDRESS LINE 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA38":
+            asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address_Line_2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address_Line_3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         elif ha_name == "HA39":
             # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
             asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -67,26 +776,146 @@ class DataLoader:
                                              asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
                                              asset_list["post_code"].astype(str).str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA41":
+            asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine4"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["AddressLine5"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA42":
+            asset_list["matching_address"] = asset_list["Dwelling Number"].astype(str).str.lower().str.strip() + " " + \
+                                             asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Locality"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA44":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postal Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postal Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA50":
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA51":
+            asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_address"] = np.where(
+                asset_list["Block"].str.strip().str.len() > 0,
+                asset_list["Block"].astype(str).str.lower().str.strip() + ", " + \
+                asset_list["matching_address"],
+                asset_list["matching_address"]
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA56":
+            asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Post Code"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA63":
+            asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA70":
+            asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
         elif ha_name == "HA107":
             # Create matching_address by concatenating House No, Street, Town, District, Postcode
             asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
-                                             asset_list["Street"].str.lower().str.strip() + ", " + \
-                                             asset_list["Town"].str.lower().str.strip() + ", " + \
-                                             asset_list["District"].str.lower().str.strip() + ", " + \
-                                             asset_list["Postcode"].str.lower().str.strip()
-            asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+                                             asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["District"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Postcode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HA117":
+            asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["PostCode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HAXX":
+            asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
+                                             asset_list["PostCode"].astype(str).str.lower().str.strip()
+            asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+        elif ha_name == "HAXXX":
+            asset_list["matching_address"] = (
+                asset_list["Combined Address"].astype(str).str.lower().str.strip() + ", " +
+                asset_list["Postcode"].astype(str).str.lower().str.strip()
+            )
+            asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
         else:
             raise NotImplementedError("implement me")
 
         return asset_list
 
+    @staticmethod
+    def extract_property_info_ha107(properties):
+        property_types = {
+            "House": "House",
+            "Flat": "Flat",
+            "Bungalow": "Bungalow",
+            "Maisonette": "Maisonette",
+            "Bedsit": None
+        }
+
+        built_forms = {
+            "Detached": "Detached",
+            "Semi Detached": "Semi-Detached",
+            "End Terrace": "End-Terrace",
+            "Mid Terrace": "Mid-Terrace"
+        }
+
+        # Function to extract property type and built form from a description
+        def extract_from_description(description):
+            property_type = None
+            built_form = None
+
+            for key in property_types:
+                if key in description:
+                    property_type = property_types[key]
+                    break
+
+            for key in built_forms:
+                if key in description:
+                    built_form = built_forms[key]
+                    break
+
+            return property_type, built_form
+
+        # Process each property in the list
+        results = []
+        for property_description in properties:
+            property_type, built_form = extract_from_description(property_description)
+            results.append(
+                {
+                    "Property type": property_description,
+                    "property_type": property_type,
+                    "built_form": built_form
+                }
+            )
+        results = pd.DataFrame(results)
+
+        return results
+
     def append_asset_list_built_form(self, ha_name, asset_list):
 
         # Finally, we process property_type or built form, where needed
         if ha_name == "HA6":
             asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6)
 
+        if ha_name == "HA107":
+            mapped_df = self.extract_property_info_ha107(asset_list["Property type"].unique())
+            asset_list = asset_list.merge(
+                mapped_df, how="left", on="Property type"
+            )
+
         return asset_list
 
     @staticmethod
@@ -96,52 +925,237 @@ class DataLoader:
         :return:
         """
 
-        if ha_name in ["HA107"]:
+        if ha_name == "HA107":
             asset_list["HouseNo"] = asset_list["House No"].copy()
+        elif ha_name == "HA32":
+            asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
+        elif ha_name == "HA28":
+            asset_list["HouseNo"] = asset_list["House Number"].copy()
+        elif ha_name == "HA38":
+            asset_list["HouseNo"] = asset_list["House_Number"].copy()
+        elif ha_name == "HA9":
+            asset_list["HouseNo"] = asset_list["House Number"].copy()
+        elif ha_name == "HAXXX":
+            asset_list["HouseNo"] = asset_list["Door Number"].copy()
         else:
             split_addresses = asset_list['matching_address'].str.split(',', expand=True)
             house_numbers = split_addresses[0].str.split(' ', expand=True)
+            # If we have "flat" or valley" as the house number, then the house number is actually in the second column
+            house_numbers[0] = np.where(house_numbers[0].isin(["flat", "valley"]), house_numbers[1], house_numbers[0])
+
             # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
             # many columns there might be
             house_numbers = house_numbers.iloc[:, 0:1]
             house_numbers.columns = ['HouseNo']
 
+            # Remove trailing punctuation such as , or ;
+            house_numbers["HouseNo"] = house_numbers["HouseNo"].str.rstrip(',;')
+
             asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1)
 
         return asset_list
 
     @staticmethod
-    def create_ciga_list_house_no(ha_name, ciga_list):
+    def create_ciga_list_house_no(ciga_list):
         """
         This function will append the House number onto the asset list
         :return:
         """
 
-        if ha_name in ["HA6"]:
-            split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
-            house_numbers = split_addresses[0].str.split(' ', expand=True)
-            # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
-            # many columns there might be
-            house_numbers = house_numbers.iloc[:, 0:1]
-            house_numbers.columns = ['HouseNo']
+        split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
+        house_numbers = split_addresses[0].str.split(' ', expand=True)
+        # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
+        # many columns there might be
+        house_numbers = house_numbers.iloc[:, 0:1]
+        house_numbers.columns = ['HouseNo']
 
-            ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
-        else:
-            raise NotImplementedError("Implement me")
+        ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
 
         return ciga_list
 
+    @staticmethod
+    def dedupe_ciga_list(ciga_list):
+        ciga_list["unique_key"] = ciga_list["Matched Address"] + ciga_list["Matched Postcode"]
+        # Remove spaces from the unique key
+        ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(" ", "")
+        # Remove punctuation from the unique key
+        ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(r'[^\w\s]', '')
+        # Drop duplicated keys
+        ciga_list = ciga_list[~ciga_list["unique_key"].duplicated()]
+        return ciga_list
+
+    @staticmethod
+    def get_asset_sheetname(workbook):
+        if "Asset List" in workbook.sheetnames:
+            return "Asset List"
+        elif "Asset list" in workbook.sheetnames:
+            return "Asset list"
+        elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames:
+            return "Asset"
+        elif "Decent Homes Stock" in workbook.sheetnames:
+            return "Decent Homes Stock"
+        elif "Report" in workbook.sheetnames:
+            return "Report"
+        else:
+            return "Assets"
+
+    @staticmethod
+    def get_ciga_sheetname(workbook):
+
+        if "CIGA Checks" in workbook.sheetnames:
+            return "CIGA Checks"
+        elif "CIGA checks" in workbook.sheetnames:
+            return "CIGA checks"
+        elif "CIGA check" in workbook.sheetnames:
+            return "CIGA check"
+        elif "CIGA Check" in workbook.sheetnames:
+            return "CIGA Check"
+        elif "CIGA requested" in workbook.sheetnames:
+            return "CIGA requested"
+        else:
+            return "CIGA"
+
+    @staticmethod
+    def get_survey_sheetname(workbook):
+        if "ECO Surveys" in workbook.sheetnames:
+            return "ECO Surveys"
+        elif "ECO Survey" in workbook.sheetnames:
+            return "ECO Survey"
+        elif "ECO 4 Surveys completed" in workbook.sheetnames:
+            return "ECO 4 Surveys completed"
+        elif "ECO4 Surveys" in workbook.sheetnames:
+            return "ECO4 Surveys"
+        else:
+            return "ECO surveys"
+
+    @staticmethod
+    def correct_ha51_asset_list(asset_list):
+        # Correct this
+        asset_list["HouseNo"] = np.where(
+            asset_list["matching_address"].str.contains("61 wandle bank"),
+            asset_list["Block"].str.lower(),
+            asset_list["HouseNo"]
+        )
+
+        return asset_list
+
+    def prepare_ha17(self, workbook):
+        blocks_sheet = workbook["Blocks List - Cavity Wall only"]
+        blocks_data = []
+        blocks_colnames = [cell.value for cell in blocks_sheet[2]]
+        for row in blocks_sheet.iter_rows(min_row=4, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            blocks_data.append(row_data)
+
+        blocks_df = pd.DataFrame(blocks_data, columns=blocks_colnames)
+
+        blocks_df["matching_address"] = (
+            blocks_df["Block Name\n[as per Naming Convention procedure]"].astype(str).str.lower().str.strip() + ", " +
+            blocks_df["Block Street Name"].astype(str).str.lower().str.strip() + ", " +
+            blocks_df["Postcode"].astype(str).str.lower().str.strip()
+        )
+        blocks_df["matching_postcode"] = blocks_df["Postcode"].astype(str).str.lower().str.strip()
+        blocks_df["property_type"] = "Flat"
+
+        street_properties_sheet = workbook["Street Properties - Cavity Wall"]
+        street_properties_data = []
+        street_properties_colnames = [cell.value for cell in street_properties_sheet[2]]
+        for row in street_properties_sheet.iter_rows(min_row=3, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            street_properties_data.append(row_data)
+
+        street_properties_df = pd.DataFrame(street_properties_data, columns=street_properties_colnames)
+
+        street_properties_df["matching_address"] = (
+            street_properties_df["Block Name\n[as per Naming Convention procedure]"].astype(
+                str).str.lower().str.strip() + ", " +
+            street_properties_df["Postcode"].astype(str).str.lower().str.strip()
+        )
+        street_properties_df["matching_postcode"] = street_properties_df["Postcode"].astype(str).str.lower().str.strip()
+        street_properties_df["property_type"] = street_properties_df[
+            "Block typology based on dwelling type\n[defined list]"
+        ]
+
+        asset_list_compressed = pd.concat(
+            [
+                blocks_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]],
+                street_properties_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]]
+            ],
+            axis=0
+        )
+        # We expand
+        range_pattern = r"(\d+)\s+to\s+(\d+)\s+(.*)"
+        asset_list = []
+        for _, row in tqdm(asset_list_compressed.iterrows(), total=len(asset_list_compressed)):
+            if row["ECO Eligibility"] == "Not Eligible":
+                asset_list.append(row.to_dict())
+                continue
+
+            # Detect a house number range
+            match = re.search(range_pattern, row["matching_address"])
+
+            if not match:
+                asset_list.append(row.to_dict())
+                continue
+
+            # Extracting the start and end of the range
+            start_number = int(match.group(1))
+            end_number = int(match.group(2))
+            rest_of_address = match.group(3)
+
+            # Generating the list of house numbers
+            house_numbers = list(range(start_number, end_number + 1))
+            data_to_extend = []
+            for house_number in house_numbers:
+                new_adress = f"{house_number} {rest_of_address}"
+
+                entry = row.to_dict().copy()
+                entry.update({"matching_address": new_adress})
+
+                data_to_extend.append(entry)
+
+            asset_list.extend(data_to_extend)
+
+        asset_list = pd.DataFrame(asset_list)
+
+        # Add in asset_list_row_id
+        asset_list["asset_list_row_id"] = ["HA17" + str(i) for i in range(0, len(asset_list))]
+
+        # Add on house number
+        asset_list = self.create_asset_list_house_no(ha_name="HA17", asset_list=asset_list)
+
+        return asset_list
+
     def load_asset_list(self, filepath, ha_name):
         workbook = openpyxl.load_workbook(filepath)
-        asset_sheet = workbook["Assets"]
+        if ha_name == "HA17":
+            asset_list = self.prepare_ha17(workbook)
+            return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+        else:
+            asset_sheetname = self.get_asset_sheetname(workbook)
+
+        asset_sheet = workbook[asset_sheetname]
         asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
+        if ha_name == "HA25":
+            asset_sheet_colnames[11] = "matching_postcode"
+
+        if ha_name == "HA31":
+            asset_sheet_colnames[2] = "matching_postcode"
+
+        if ha_name == "HA54":
+            asset_sheet_colnames[10] = "matching_postcode"
+
+        if ha_name == "HA5":
+            asset_sheet_colnames[2] = "matching_postcode"
 
         rows_data = []
+
         for row in asset_sheet.iter_rows(min_row=2, values_only=False):
             row_data = [cell.value for cell in row]  # This will get you the cell values
             rows_data.append(row_data)
 
         asset_list = pd.DataFrame(rows_data, columns=asset_sheet_colnames)
+
         asset_list = asset_list.loc[:, asset_list.columns.notnull()]
 
         # Remove entirely empty rows - consider all rows apart from row_color
@@ -157,39 +1171,89 @@ class DataLoader:
 
         asset_list = self.append_asset_list_built_form(ha_name=ha_name, asset_list=asset_list)
 
-        # We check if there is a survey list
-        survey_list = pd.DataFrame()
-        if "ECO Surveys" in workbook.sheetnames:
-            survey_sheet = workbook["ECO Surveys"]
-            survey_rows = []
-            for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
-                row_data = [cell.value for cell in row]  # This will get you the cell values
-                survey_rows.append(row_data)
+        # We correct the asset list if it needs it
+        # Correct the asset list
+        correction_function_name = f"correct_{ha_name.lower()}_asset_list"
+        if hasattr(self, correction_function_name):
+            asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
+            asset_list = asset_list_correction_function(asset_list)
 
-            survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+        # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga
+        # lists, and so
+        # we can return the asset list now
+        if ha_name in ["HA1", "HA27"]:
+            return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+
+        # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
+        # suitable under ECO4, since their walls will be filled
+        eco3_list = pd.DataFrame()
+        sheetnames_lower = [x.lower() for x in workbook.sheetnames]
+        eco3_sheetname_index = [i for i, x in enumerate(sheetnames_lower) if "eco3" in x.replace(" ", "")]
+        if eco3_sheetname_index:
+            eco3_sheetname = workbook.sheetnames[eco3_sheetname_index[0]]
+            eco3_sheet = workbook[eco3_sheetname]
+            eco3_rows = []
+            for row in eco3_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+                row_data = [cell.value for cell in row]  # This will get you the cell values
+                eco3_rows.append(row_data)
+
+            eco3_list = pd.DataFrame(eco3_rows, columns=[cell.value for cell in eco3_sheet[1]])
             # Remove columns that are None
-            survey_list = survey_list.loc[:, survey_list.columns.notnull()]
-            survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
-            # Perform survey list merge
+            eco3_list = eco3_list.loc[:, eco3_list.columns.notnull()]
+            # Remove rows that are completely empty
+            eco3_list = eco3_list.loc[eco3_list.loc[:, eco3_list.columns].notnull().any(axis=1)]
+            eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))]
+
+            # Perform the eco3 merge
+            if not eco3_list.empty:
+                eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
+
+        if ha_name in ["HA25"]:
+            # Accomodate ha25 unique structure
+            return asset_list, pd.DataFrame(), pd.DataFrame(), eco3_list
+
+        # We check if there is a survey list
+        survey_sheetname = self.get_survey_sheetname(workbook)
+        survey_sheet = workbook[survey_sheetname]
+        survey_rows = []
+        for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            survey_rows.append(row_data)
+
+        survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+        # Remove columns that are None
+        survey_list = survey_list.loc[:, survey_list.columns.notnull()]
+        # Remove rows that are completely empty
+        survey_list = survey_list.loc[survey_list.loc[:, survey_list.columns].notnull().any(axis=1)]
+        survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
+
+        # Perform survey list merge
+        if not survey_list.empty:
             survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
 
         # We check if there are CIGA checks
-        ciga_list = pd.DataFrame()
-        if "CIGA Checks" in workbook.sheetnames:
-            ciga_sheet = workbook["CIGA Checks"]
-            ciga_rows = []
-            for row in ciga_sheet.iter_rows(min_row=2, values_only=False):
-                row_data = [cell.value for cell in row]  # This will get you the cell values
-                ciga_rows.append(row_data)
+        ciga_sheetname = self.get_ciga_sheetname(workbook)
+        ciga_sheet = workbook[ciga_sheetname]
+        ciga_rows = []
+        for row in ciga_sheet.iter_rows(min_row=2, values_only=False):
+            row_data = [cell.value for cell in row]  # This will get you the cell values
+            ciga_rows.append(row_data)
 
-            ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
-            # Remove columns that are None
-            ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
-            ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
-            # Perform ciga list merge
+        ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
+        # Remove columns that are None
+        ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+        # Remove rows that are completely None
+        ciga_list = ciga_list.loc[ciga_list.loc[:, ciga_list.columns].notnull().any(axis=1)]
+        # Perform ciga list merge
+        if not ciga_list.empty:
+            # Remove rows with missing postcode which happens in a small number of cases
+            ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
+            ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
+            ciga_list = self.create_ciga_list_house_no(ciga_list)
+            ciga_list = self.dedupe_ciga_list(ciga_list)
             ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
 
-        return asset_list, survey_list, ciga_list
+        return asset_list, survey_list, ciga_list, eco3_list
 
     @staticmethod
     def correct_ha6_asset_list(asset_list):
@@ -208,6 +1272,241 @@ class DataLoader:
 
         return asset_list
 
+    @staticmethod
+    def correct_ha56_asset_list(asset_list):
+        # CH1 4JR has already been surveyed, but it's listed in the asset list
+        # as a single row, when it's actually 32 units, so we just set this
+        # as ineligible
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "CH1 4JR",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        # Same for CW8 3EU
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "CW8 3EU",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "CW1 3HP",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "WA4 2PH",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "BD6 1QJ",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "L39 1RS",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "WA10 2DE",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        # Already surveyed under ECO4
+        asset_list["ECO Eligibility"] = np.where(
+            asset_list["Post Code"] == "SK17 6NR",
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        asset_list["ECO Eligibility"] = np.where(
+            ((asset_list["Post Code"] == "WA5 0EN") &
+             (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")),
+            "Not eligible",
+            asset_list["ECO Eligibility"]
+        )
+
+        return asset_list
+
+    @staticmethod
+    def correct_ha14_asset_list(asset_list):
+
+        # For 5 Queens Court, DE72 3NP, the postcode is actually DE72 3QZ
+        asset_list.loc[
+            (asset_list["Address 1"] == "5 Queens Court") &
+            (asset_list["Postcode"].str.strip() == "DE72 3NP"),
+            "matching_postcode"
+        ] = "DE72 3QZ"
+
+        # We then correct the matching_address
+        asset_list.loc[
+            (asset_list["Address 1"] == "5 Queens Court") &
+            (asset_list["Postcode"].str.strip() == "DE72 3NP"),
+            "matching_address"
+        ] = "5 queens court, garfield avenue, draycott, derby, de72 3qz"
+
+        return asset_list
+
+    @staticmethod
+    def correct_ha15_asset_list(asset_list):
+        asset_list["matching_postcode"] = np.where(
+            asset_list["Address Line 1"] == "103 Priory Crescent",
+            "hp19 9ny",
+            asset_list["matching_postcode"]
+        )
+        return asset_list
+
+    @staticmethod
+    def correct_ha32_asset_list(asset_list):
+        asset_list["Postcode"] = np.where(
+            (asset_list["Street"] == "Norton Grove") & (asset_list["Postcode"] == "HU4 6HQ") & (
+                asset_list["Dwelling num"] == "7"),
+            "hu4 6hg",
+            asset_list["Postcode"]
+        )
+        return asset_list
+
+    @staticmethod
+    def correct_ha38_asset_list(asset_list):
+        # For Kingsford court, the house number is at the end of the address
+        def rearrange_address_if_flat(address):
+            if '/flat' in address.lower():
+                parts = address.split('/flat', 1)
+                return f"FLAT{parts[1]}, {parts[0]}"
+            return address
+
+        def extract_house_no_if_flat(address):
+            if '/flat' in address.lower():
+                # Attempt to extract the house number following "/flat"
+                try:
+                    house_no = address.split('/flat ')[1].split(' ')[0]
+                    # Remove trailing comma
+                    house_no = house_no.replace(",", "")
+                except IndexError:
+                    house_no = None
+                return house_no
+            return None
+
+        asset_list['ExtractedHouseNo'] = asset_list['matching_address'].apply(extract_house_no_if_flat)
+        asset_list.loc[asset_list['ExtractedHouseNo'].notnull(), 'HouseNo'] = asset_list['ExtractedHouseNo']
+        asset_list['matching_address'] = asset_list['matching_address'].apply(rearrange_address_if_flat)
+
+        # We update a few specific rows
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM A1",
+                    "10 SOUTH VIEW/ROOM A2",
+                    "10 SOUTH VIEW/ROOM A3",
+                ]
+            )),
+            "10A",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM A1",
+                ]
+            )),
+            "10a, 10 south view/room a1, spennymoor, co. durham, dl16 7df'",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM B1",
+                    "10 SOUTH VIEW/ROOM B2",
+                    "10 SOUTH VIEW/ROOM B3",
+                    "10 SOUTH VIEW/ROOM B4",
+                ]
+            )),
+            "10B",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/ROOM B1",
+                ]
+            )),
+            "10b, 10 south view/room b1, spennymoor, co. durham, dl16 7df",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT C",
+                ]
+            )),
+            "10C",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT C",
+                ]
+            )),
+            "FLAT c, spennymoor, co. durham, dl16 7df, 10c, 10 south view",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT D",
+                ]
+            )),
+            "10D",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT D",
+                ]
+            )),
+            "FLAT d, spennymoor, co. durham, dl16 7df, 10d, 10 south view",
+            asset_list["matching_address"]
+        )
+
+        asset_list["HouseNo"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT E",
+                ]
+            )),
+            "10E",
+            asset_list["HouseNo"]
+        )
+
+        asset_list["matching_address"] = np.where(
+            (asset_list["Address_Line_1"].isin(
+                [
+                    "10 SOUTH VIEW/FLAT E",
+                ]
+            )),
+            'FLAT e, spennymoor, co. durham, dl16 7df, 10e, 10 south view',
+            asset_list["matching_address"]
+        )
+
+        return asset_list
+
     @staticmethod
     def correct_ha6_survey_list(survey_list):
 
@@ -335,24 +1634,1167 @@ class DataLoader:
             "Post Code"
         ] = "ST5 7BY"
 
+        # PERFORM ADDITIONAL DROPS
+        # Dropping rows based on multiple conditions
+        conditions_to_drop = [
+            (survey_list['Street / Block Name'] == "Bedford Crescent") & (survey_list['Post Code'] == "ST5 3EH") & (
+                survey_list['NO.'] == 23) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "Hereford Avenue") & (survey_list['Post Code'] == "ST5 3EJ") & (
+                survey_list['NO.'] == 92) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "Seabridge Lane") & (survey_list['Post Code'] == "ST5 3EX") & (
+                survey_list['NO.'].isin([16, 18, 42])) & (
+                survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "ESKDALE PLACE") & (survey_list['Post Code'] == "ST5 3QW") & (
+                survey_list['NO.'] == 5) & (survey_list['SUBMISSION DATE'].astype(str) == "2023-03-06 00:00:00"),
+            (survey_list['Street / Block Name'] == "Birch House road") & (survey_list['Post Code'] == "ST6 2LS") & (
+                survey_list['NO.'].isin([56, 58])),
+            (survey_list['Street / Block Name'] == "Blackthorn Place") & (survey_list['Post Code'] == "ST6 2LS") & (
+                survey_list['NO.'].isin([37, 39])),
+            (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 7BT") & (
+                survey_list['NO.'].isin([17, 6])),
+            (survey_list['Street / Block Name'] == "Lion Grove") & (survey_list['Post Code'] == "ST5 7HQ") & (
+                survey_list['NO.'].isin([10, 12])) & (
+                survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "DENRY CRESCENT") & (survey_list['Post Code'] == "ST5 8JW") & (
+                survey_list['NO.'] == 87) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+            (survey_list['Street / Block Name'] == "HOLLINS CRESCENT") & (survey_list['Post Code'] == "ST7 1JW") & (
+                survey_list['NO.'] == 19)
+        ]
+
+        # Combine all conditions with an OR "|"
+        combined_condition = np.logical_or.reduce(conditions_to_drop)
+
+        # Drop rows that meet the combined condition
+        survey_list = survey_list[~combined_condition]
+
+        # Making replacements using np.where
+        survey_list['Post Code'] = np.where(
+            (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3EH") & (
+                survey_list['NO.'] == 17),
+            "ST5 7BT",
+            survey_list['Post Code']
+        )
+
+        survey_list['Post Code'] = np.where(
+            (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3ED") & (
+                survey_list['NO.'] == 6),
+            "ST5 7BT",
+            survey_list['Post Code']
+        )
+
+        # Maple avenue (stoke on trent, not newcastle) should be st7 1jw
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"].str.lower().str.contains("maple avenue")) & (
+                survey_list["Post Code"].str.lower() == "st7 1jx"
+            ),
+            "st7 1jw",
+            survey_list["Post Code"]
+        )
+
+        # Hollins Crescent should be st7 1jx
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"].str.lower().str.contains("hollins crescent")) & (
+                survey_list["Post Code"].str.lower() == "st7 1jw"
+            ),
+            "st7 1jx",
+            survey_list["Post Code"]
+        )
+
+        # Additional drops as the above misses some:
+        survey_list = survey_list[
+            ~((survey_list["NO."].astype(str).isin(["18", "42"])) &
+              (survey_list["Street / Block Name"] == "Seabridge Lane") &
+              (survey_list["Post Code"] == "ST5 3EY") &
+              (survey_list["SUBMISSION DATE"].astype(str) == "24.07.2023") &
+              (survey_list["INSTALLED OR CANCELLED"].str.contains("NO UPDATE YET")))
+        ]
+
         return survey_list
 
+    @staticmethod
+    def correct_ha14_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Godfrey Road", "Godfrey Drive"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Oiliver Road", "Oliver Road"
+        )
+
+        # For postodes DE7 4FB, DE7 4EZ, it's actually spelled WINDERMERE AVENUE, not WINDEREMERE AVENUE (without the
+        # extra e)
+        survey_list.loc[
+            (survey_list["Street / Block Name"] == "WINDEREMERE AVENUE") &
+            (survey_list["Post Code"].isin(["DE7 4FB", "DE7 4EZ"])),
+            "Street / Block Name"
+        ] = "WINDERMERE AVENUE"
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "MACDONALD SQAURE", "MACDONALD SQUARE"
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha15_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Mary Mac Manus Drive, Milton Keynes", "Mary Mac Manus Drive"
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha16_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "REEDS RD",
+            "Reeds ROAD",
+            survey_list["Street / Block Name"]
+        )
+        # Replace " rd " with "road"
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road',
+                                                                                            regex=True)
+
+        # Replace " , " with ", "
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
+            " , ", ', ',
+        )
+        # Fix "{place} ,{place}" with "{place}, {place}"
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ',
+                                                                                            regex=True)
+        # Strip whitespace
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
+
+        # Correct errors
+        survey_list["Post Code"] = np.where(
+            survey_list["Post Code"] == "M38 0SA",
+            "M38 9SA",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
+            "M44 5JF",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road",
+                                                                                            "chatley road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
+                                                                                            "plantation avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
+                                                                                            "howclough drive")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
+                                                                                            "brookhurst lane")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
+                                                                                            "birch road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
+                                                                                            "hodson road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
+                                                                                            "narbonne avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "cumberland road, cadishead",
+            "cumberland avenue, cadishead")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
+                                                                                            "ashton field drive")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
+                                                                                            "wedgwood road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
+                                                                                            "hamilton avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "lichens crescent, fitton hill",
+            "lichens crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
+                                                                                            "south croft")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr",
+                                                                                            "fir tree avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
+                                                                                            "hawthorn crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
+                                                                                            "reins lee avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
+                                                                                            "wester hill road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
+                                                                                            "saint martins road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
+                                                                                            "timperley close")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
+                                                                                            "eastwood avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
+                                                                                            "grasmere road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
+                                                                                            "hulton avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
+                                                                                            "beechfield road")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
+                                                                                            "princes avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
+                                                                                            "edge fold crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
+                                                                                            "coniston avenue")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
+                                                                                            "blackthorn crescent")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
+                                                                                            "wellstock lane")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
+                                                                                            "brackley street")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
+                                                                                            "brook avenue, swinton")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
+                                                                                            "green avenue, swinton")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
+                                                                                            "grasmere avenue, wardley")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
+                                                                                            "mardale avenue, wardle")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
+                                                                                            "cartleach Grove")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
+                                                                                            "arbor Grove")
+
+        # Replacement for clively avenue 66-68
+        survey_list["NO."] = np.where(
+            survey_list["NO."] == "66-68",
+            "66",
+            survey_list["NO."]
+        )
+
+        # Delete some duplicated entries
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "york road") &
+              (survey_list["NO."].astype(str) == "12") &
+              (survey_list["Post Code"] == "M44 5HU") &
+              (survey_list["SUBMISSION DATE"].astype(str) == "45229"))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "peatfield avenue") &
+              (survey_list["NO."].astype(str) == "23") &
+              (survey_list["Post Code"] == "M27 9XG") &
+              (survey_list["SUBMISSION DATE"].astype(str) == "45236"))
+        ]
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha24_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "council house, nidds lane", "nidds lane"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "wirral avenue", "wirrall avenue"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "st ives road", "st. ives crescent"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "sundringham road", "sandringham road"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "milton avenue", "milton road"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "st ives crescent", "st. ives crescent"
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "council house, waterbelly lane", "waterbelly lane"
+        )
+        # Generally remove "councile house, " from the start of the street name
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "council house, ", ""
+        )
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "st. leodegars close", "st leodegars close"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "montgomery crescent", "montgomery road"
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha28_survey_list(survey_list):
+        # Rename the "No" column to "No." to align with the other survey sheets
+        survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+        survey_list["Post Code"] = np.where(
+            survey_list["Post Code"] == "ME75HA",
+            "ME7 5HA",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ANDREW MANOR/BRITTON ST", "ANDREW MANOR"
+        )
+
+        survey_list["Post Code"] = np.where(
+            survey_list["Post Code"] == "ME75TW",
+            "ME7 5TW",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST MARKS HOUSE/SAXON ST", "ST MARKS HOUSE"
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha38_survey_list(survey_list):
+        # Rename the "No" column to "No." to align with the other survey sheets
+        survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'Kingsford Court, Coombe Valley Road', 'Kingsford Court'
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'LESLIE TEW COURT/DERWENT ROAD', 'LESLIE TEW COURT'
+        )
+
+        # There is no 18A LESLIE TEW COURT in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "LESLIE TEW COURT") &
+              (survey_list["Post Code"] == "TN10 3TX") &
+              (survey_list["NO."] == "18A"))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'Brindley House, Wellbeck Road', 'Brindley House'
+        )
+
+        # Try taking just the first part of the string, splitting on a /
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split('/').str[0].str.strip()
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            'HUNTSMAN WAY', 'HUNTSMANS WAY'
+        )
+
+        # Try taking just the first part of the string, splitting on a ,
+        survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split(',').str[0].str.strip()
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "McCLAREN COURT", "MCLAREN COURT"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST JAMES CLOISTERS", "ST. JAMES'S CLOISTERS"
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            ((survey_list["NO."].isin(
+                [
+                    "FLAT 1 22",
+                    "FLAT 2 22",
+                    "FLAT 3 22",
+                    "FLAT 4 22",
+                    "FLAT 5 22",
+                    "FLAT 6 22",
+                ]
+            )) &
+             (survey_list["Street / Block Name"] == "MELTON ROAD")),
+            "22 MELTON ROAD",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            ((survey_list["NO."].isin(
+                [
+                    "FLAT 1 24",
+                    "FLAT 2 24",
+                    "FLAT 3 24",
+                    "FLAT 4 24",
+                    "FLAT 5 24",
+                    "FLAT 6 24",
+                ]
+            )) &
+             (survey_list["Street / Block Name"] == "MELTON ROAD")),
+            "24 MELTON ROAD",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "TURRETT GREEN COURT SILENT STREET", "TURRET GREEN COURT"
+        )
+
+        # Turret green court flat 1 doesn't exist in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "TURRET GREEN COURT") &
+              (survey_list["NO."] == 1))
+        ]
+        # 3, 45 raywell steet doesn't exist in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "45 RAYWELL STREET") &
+              (survey_list["NO."] == 3))
+        ]
+
+        # 40 Avondale drive doesn't exist in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Avondale Drive") &
+              (survey_list["NO."] == 40))
+        ]
+        # 17A beech road has the wrong postcode
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == "BEECH ROAD") &
+            (survey_list["Post Code"] == "DH6 1JD"),
+            "DH6 1JB",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            (survey_list["Street / Block Name"] == "SOUTHVIEW") &
+            (survey_list["Post Code"] == "DL16 7DF"),
+            "SOUTH VIEW",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == "BEECH ROAD") &
+            (survey_list["Post Code"] == "DH6 1JD"),
+            "DH6 1JB",
+            survey_list["Post Code"]
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha32_survey_list(survey_list):
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Coxwold",
+            "Coxwold Grove",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update the Barringhton Avenue with their correct spelling: Barrington Avenue
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Barringhton Avenue",
+            "Barrington Avenue",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update how the Rustenburn addresses are listed in the identified addresses
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Rustenburg",
+            "Rustenburg Street",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update how the MALIN LODGE, RONALDSWAY CLOSE addresses are listed in the identified addresses
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "MALIN LODGE, RONALDSWAY CLOSE",
+            "Malin Lodge",
+            survey_list["Street / Block Name"]
+        )
+
+        # Update how the Feroes Close are listed in the identified addresses
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == "Feroes Close",
+            "Faroes Close",
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == 'FORESTER  WAY',
+            'FORESTER WAY',
+            survey_list["Street / Block Name"]
+        )
+
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == '6 Zeigfeld',
+            'Ziegfeld Court',
+            survey_list["Street / Block Name"]
+        )
+
+        # Malin Lodge, Ronaldsway Close
+        survey_list["Street / Block Name"] = np.where(
+            survey_list["Street / Block Name"] == 'Malin Lodge, Ronaldsway Close',
+            'Malin Lodge',
+            survey_list["Street / Block Name"]
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha50_survey_list(survey_list):
+
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == 'COSELEY STREET') &
+            (survey_list["Post Code"] == 'ST16 1LR'),
+            "ST6 1JU",
+            survey_list["Post Code"]
+        )
+
+        # Remove some of COSELEY STREET, as we have surveys done, outside of the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "COSELEY STREET") &
+              (survey_list["Post Code"] == "ST6 1JU") &
+              (survey_list["NO."].isin([96])))
+        ]
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace("ST33JZ", "ST3 3JZ")
+
+        # Remove some of Jesmond drive as we have surveys done outside of the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Jesmond Drive") &
+              (survey_list["Post Code"] == "ST3 3JZ") &
+              (survey_list["NO."].isin([29])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BRUNDELL OVAL", "BRUNDALL OVAL"
+        )
+
+        # Remove 4 Linden Place
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Linden Place") &
+              (survey_list["Post Code"] == "ST3 3AT") &
+              (survey_list["NO."].isin([4])))
+        ]
+
+        # Remove 11 Tilehurst Place
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Tilehurst Place") &
+              (survey_list["Post Code"] == "ST3 3AP") &
+              (survey_list["NO."].isin([11])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "deavile road", "DEAVILLE ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "WOOLISCROFT ROAD", "WOOLLISCROFT ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Leak Road", "Leek Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Springfield road", "Springfields road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "MILLWARD RD", "MILLWARD ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "REPINGTON RD", "REPINGTON ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ECCELSTONE PLACE", "ECCLESTONE PLACE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St. James Place", "St James Place"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CHELL HEATH RD", "CHELL HEATH ROAD"
+        )
+        # Correct postcode
+        survey_list["Post Code"] = np.where(
+            (survey_list["Street / Block Name"] == 'CHELL HEATH ROAD') &
+            (survey_list["Post Code"] == 'ST6 6HU'),
+            "ST6 6HJ",
+            survey_list["Post Code"]
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Franklin Rd", "Franklin Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Lodge Rd", "Lodge Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St Matthews Street", "St Matthew Street"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Grove Bank Road", "Grovebank Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "OVERSLEY RD", "OVERSLEY ROAD"
+        )
+
+        # Replace all of the " RD" with " ROAD"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            " RD", " ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St. Georges Crescent", "St Georges Crescent"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Tewson Road", "Tewson Green"
+        )
+
+        # Remove 55 Seabridge Lane
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Seabridge Lane") &
+              (survey_list["Post Code"] == "ST5 4AG") &
+              (survey_list["NO."].isin([55])))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Tyne Way") &
+              (survey_list["Post Code"] == "ST5 4AX") &
+              (survey_list["NO."].isin([56])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St.Bernards Place", "St Bernard Place"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Penarth Road", "Penarth Grove"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St. Marys Road", "St Marys Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Larch Drive", "Larch Grove"
+        )
+
+        # Drop 31 Lauder place north, as there is a duplicate. THis version also has a wrong postcode
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "LAUDER PLACE NORTH") &
+              (survey_list["Post Code"] == "ST20QS") &
+              (survey_list["NO."].isin([31])))
+        ]
+
+        # Handle dropping of dupes
+        survey_list["street_pruner"] = survey_list["Street / Block Name"].str.lower().str.replace(" ", "")
+        survey_list["postcode_pruner"] = survey_list["Post Code"].str.lower().str.replace(" ", "")
+
+        # Should go to 18
+        survey_list = survey_list.drop_duplicates(["NO.", "street_pruner", "postcode_pruner"])
+        survey_list = survey_list.drop(columns=["street_pruner", "postcode_pruner"])
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha107_survey_list(survey_list):
+        # Replace Front Street, East Stockham with Front Street, East Stockwith
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Front Street, East Stockham", "Front Street, East Stockwith"
+        )
+
+        # Replace "HONEYHOLE L;ANE" with "HONEYHOLES LANE"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "HONEYHOLE L;ANE", "HONEYHOLES LANE"
+        )
+
+        # Replace "Croft Lane Cherry Willingham, Lincoln" with "Croft Lane, Cherry Willingham, Lincoln"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Croft Lane Cherry Willingham, Lincoln", "Croft Lane, Cherry Willingham, Lincoln"
+        )
+
+        # Replace "Snelland Road Wickenby, Lincoln" with "Snelland Road, Wickenby, Lincoln"
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Snelland Road Wickenby, Lincoln", "Snelland Road, Wickenby, Lincoln"
+        )
+
+        # Replace Reasby Road Snelland, Lincoln with Reasby Road, Snelland, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Reasby Road Snelland, Lincoln", "Reasby Road, Snelland, Lincoln"
+        )
+
+        # Replace Silver Street Bardney, Lincoln with Silver Street, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Silver Street Bardney, Lincoln", "Silver Street, Bardney, Lincoln"
+        )
+
+        # Replace Manor Close Bardney, Lincoln with Manor Close, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Manor Close Bardney, Lincoln", "Manor Close, Bardney, Lincoln"
+        )
+
+        # Replace Ferry Road Southrey, Lincoln with Ferry Road, Southrey, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Ferry Road Southrey, Lincoln", "Ferry Road, Southrey, Lincoln"
+        )
+
+        # Replace Harvey Kent Gardens Bardney, Lincoln with Harvey Kent Gardens, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Harvey Kent Gardens Bardney, Lincoln", "Harvey Kent Gardens, Bardney, Lincoln"
+        )
+
+        # Replace Wragby Road Bardney, Lincoln with Wragby Road, Bardney, Lincoln
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Wragby Road Bardney, Lincoln", "Wragby Road, Bardney, Lincoln"
+        )
+
+        # Replace SPRINKHILL ROAD with SPINKHILL ROAD
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "SPRINKHILL ROAD", "SPINKHILL ROAD"
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha41_survey_list(survey_list):
+        return survey_list
+
+    @staticmethod
+    def correct_ha12_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Henstone Road", "Hanstone Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Lindern avenue", "Linden Avenue"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "priness way", "Princess Way"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Worth Crecesent", "Worth Crescent"
+        )
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "DY117HA", "DY11 7HA"
+        )
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "DY117HF", "DY11 7HF"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Adderbrook Crescent", "Addenbrooke Crescent"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Kinver Road", "Kinver Avenue"
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha13_survey_list(survey_list):
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Woodfarm Road", "WOOD FARM ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ALLANDALE ROAD", "ALLANDALE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "NEWFIELDS LANE", "NEWFIELD LANE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BROADFIELDS ROAD", "BROADFIELD ROAD"
+        )
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "HP2 5SF+", "HP2 5SF"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PESCOTT HILL", "PESCOT HILL"
+        )
+
+        # This is a duplicate record
+        survey_list = survey_list[
+            ~((survey_list["NO."] == 33) &
+              (survey_list["Street / Block Name"] == "Turners Hill") &
+              (survey_list["Post Code"] == "HP2 4LH") &
+              (survey_list["INSTALLED OR CANCELLED"] == "NO UPDATE - CHECKED 18.12.23"))
+        ]
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha18_survey_list(survey_list):
+        return survey_list
+
+    @staticmethod
+    def correct_ha35_survey_list(survey_list):
+        return survey_list
+
+    @staticmethod
+    def correct_ha34_survey_list(survey_list):
+        # Note in the asset list
+        survey_list = survey_list[
+            survey_list["Post Code"] != "L5 3SS"
+            ]
+
+        survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+            "L177DR", "L17 7DR"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PENVALLEY CRESENT", "Penvalley Crescent"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PENLINKEN DRIVE", "Penlinken Drive"
+        )
+
+        # There's no 32 Penlinken Drive in the asset sheet
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Penlinken Drive") &
+              (survey_list["NO."] == 32))
+        ]
+
+        # There's no 30 Gwent Street in the asset sheet
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "GWENT ST") &
+              (survey_list["NO."] == 30))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "POULTON RD", "Poulton Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST PAULS RD", "St Pauls Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BROAD LANE, KIRKBY", "BROAD LANE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BULLENS RD, KIRKBY", "Bullens Road"
+        )
+
+        # There's no 219 NORTH HILL ST in the asset sheet
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "NORTH HILL ST") &
+              (survey_list["NO."] == 219))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CROSLAND RD, KIRKBY", "CROSLAND ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PARK BROW DRIVE, KIRKBY", "Park Brow Drive"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CELTIC TREET", "Celtic Street"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BUCKLAND ROAD", "Buckland Street"
+        )
+
+        # duplicates
+        survey_list = survey_list.drop_duplicates(["Street / Block Name", "NO.", "Post Code"])
+
+        # This is a duplicate with wrong postcode
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "CLARIBEL STREET") &
+              (survey_list["NO."] == 7) &
+              (survey_list["Post Code"] == "L8 8AF"))
+        ]
+
+        survey_list["NO."] = np.where(
+            ((survey_list["NO."] == "187 A") &
+             (survey_list["Post Code"] == "L32 6QF")),
+            "187A",
+            survey_list["NO."]
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha56_survey_list(survey_list):
+        # Not in asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Samual Street") &
+              (survey_list["NO."].isin([22, 24])) &
+              (survey_list["Post Code"] == "WA5 1BB"))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "STOURTON RD", "Stourton Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "BIRKIN RD", "Birkin Road"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "PORTLAND RD", "Portland Road"
+        )
+
+        # We remove a row, because two rows match to a block listing
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Tavlin Avenue") &
+              (survey_list["NO."] == 17) &
+              (survey_list["Post Code"] == "WA5 0EN"))
+        ]
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha30_survey_list(survey_list):
+
+        survey_list = survey_list[~pd.isnull(survey_list["Post Code"])]
+
+        # Split on / and take the first half
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0]
+
+        # Not in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Horsebridge Road") &
+              (survey_list["NO."] == 286))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "DUTTON WAY") &
+              (survey_list["NO."] == 9))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "PAYTHORNE CLOSE") &
+              (survey_list["NO."] == 10))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "MARCHWOOD ROAD") &
+              (survey_list["NO."] == 11))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Otterburn Close") &
+              (survey_list["NO."] == 4))
+        ]
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Blossom Court") &
+              (survey_list["NO."] == 5))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "St LUKES CLOSE , HUNTINGDON", "St. Lukes Close"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "St. Lukes Close") &
+              (survey_list["NO."].isin([4, 7, 8])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ROMAN WAY , GODMANCHESTER , HUNTINGDON", "Roman Way"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Roman Way") &
+              (survey_list["NO."].isin([58])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "HEADLANDS , FENSTANTON , HUNTINGDON", "Headlands Fenstanton"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Headlands Fenstanton") &
+              (survey_list["NO."].isin([126, 134])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "WALLACE COURT , HUNTINGDON", "Wallace Court"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "CRICKETERS WAY , CHATTERIS", "Cricketers Way"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Jubilee Gardens", "Jubilee Green"
+        )
+
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "Harrow Road") &
+              (survey_list["NO."].isin([10])))
+        ]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ST LUKES CLOSE", "St. Lukes Close"
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha49_survey_list(survey_list):
+        return survey_list
+
+    @staticmethod
+    def correct_ha8_survey_list(survey_list):
+        # Split on / and take the first half
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0]
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "WESTONIA COURT HOUSE", "Westonia Court"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Hillesdon Avenue", "Hillesden Avenue"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Weston Street", "Western Street"
+        )
+
+        # Remove placeholder rows where postcode is missing
+        survey_list = survey_list[
+            ~pd.isnull(survey_list["Post Code"])
+        ]
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha11_survey_list(survey_list):
+        # Remove 39 HOLLYWOOD WAY as it's not in the asset list
+        survey_list = survey_list[
+            ~((survey_list["Street / Block Name"] == "HOLLYWOOD WAY") &
+              (survey_list["NO."] == 39))
+        ]
+        return survey_list
+
+    @staticmethod
+    def correct_ha42_survey_list(survey_list):
+        # original asset list has nothing in the street
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Turnstone Terrace", ""
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Pegasus place", ""
+        )
+        return survey_list
+
+    @staticmethod
+    def correct_ha45_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Norwich Road", "Norwich Avenue"
+        )
+        return survey_list
+
+    @staticmethod
+    def correct_ha51_survey_list(survey_list):
+        survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Autum Close", "Autumn Close"
+        )
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha52_survey_list(survey_list):
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Mardalle Avenue", "Mardale Avenue"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Ollerton  Close, Grappenhall", "Ollerton Close"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Bradshaw Road, Grappenhall", "Bradshaw Lane"
+        )
+
+        # Drop a bunch of dupes
+        survey_list = survey_list.drop_duplicates(["NO.", "Street / Block Name", "Post Code"])
+
+        return survey_list
+
+    @staticmethod
+    def correct_ha5_survey_list(survey_list):
+        return survey_list
+
+    @staticmethod
+    def correct_ha20_survey_list(survey_list):
+        # Not in the asset list
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Abbot Close", "ABBOTS CLOSE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Downbarns Road", "DOWN BARNS ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "Austin Lane", "AUSTINS LANE"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "South Park Way", "SOUTHPARK WAY"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "OAKLAND ROAD", "OAKWOOD ROAD"
+        )
+
+        survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+            "ACRE WAY/NORTHWOOD", "ACRE WAY"
+        )
+
+        return survey_list
+
+    @staticmethod
+    def levenstein_match(matching_string, df):
+        match_to = df["matching_address"].tolist()
+        # Strip out punctuation and spaces
+        match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+        match_to = [x.replace(" ", "") for x in match_to]
+
+        # Perform matching between full key and match_to
+        distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+        best_match_index = distances.index(min(distances))
+        # We might want to consider a threshold for the distance, however for the momeny,
+        # we don't consider this for the moment
+        df = df.iloc[best_match_index:best_match_index + 1]
+
+        return df
+
     def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
 
-        # Correct the asset list
-        asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
-        asset_list = asset_list_correction_function(asset_list)
         # Correct the survey list
         survey_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_survey_list")
         survey_list = survey_list_correction_function(survey_list)
 
         missed_postcodes = []
-        if ha_name == "HA6":
+        if ha_name in ["HA6", "HA34"]:
             missed_postcodes = [
                 postcode.lower() for postcode in survey_list["Post Code"] if
                 postcode.lower() not in asset_list["matching_postcode"].values
             ]
 
+        if ha_name == "HA13":
+            missed_postcodes = ["hp17 8le"]
+
+        if ha_name == "HA56":
+            # Multiple properties are listed as blocks, which is a problem for matching
+            missed_postcodes = ["sk17 6nr", "wa5 0en"]
+
         matching_lookup = []
         for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
 
@@ -365,12 +2807,44 @@ class DataLoader:
                 asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip())
             ].copy()
 
+            if not any(df["matching_address"].str.contains(str(house_number))):
+                if "flat" in str(house_number):
+                    house_number = house_number.split("flat")[1].strip()
+
+                # We check if we had an instance of flat x, y
+                if "," in str(house_number):
+                    house_number = house_number.split(",")[0].strip()
+
+                # We may also have a space for an instance of flat x y
+                if " " in str(house_number):
+                    house_number = house_number.split(" ")[0].strip()
+
             df = df[df["matching_address"].str.contains(str(house_number))]
+
+            if df.empty:
+
+                postcode_lower = row["Post Code"].lower()
+                if postcode_lower in missed_postcodes:
+                    matching_lookup.append(
+                        {
+                            "survey_list_row_id": row["survey_list_row_id"],
+                            "asset_list_row_id": None,
+                        }
+                    )
+                    continue
+
+                print(row["Street / Block Name"])
+                print(house_number)
+                print(row["Post Code"])
+                raise ValueError("Investigate")
+
             if df.shape[0] != 1:
-                df = df[df["HouseNo"] == str(house_number)]
+                df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
                 if df.shape[0] != 1:
-                    df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
-                    if df.shape[0] != 1:
+                    df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
+
+                    if df.empty:
+
                         postcode_lower = row["Post Code"].lower()
                         if postcode_lower in missed_postcodes:
                             matching_lookup.append(
@@ -381,10 +2855,23 @@ class DataLoader:
                             )
                             continue
 
-                        print(row["Street / Block Name"])
-                        print(house_number)
-                        print(row["Post Code"].lower())
-                        raise ValueError("Investigate")
+                    if df.shape[0] != 1:
+                        if "Town/Area" not in row.keys():
+                            full_key = (str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() +
+                                        row["Post Code"].lower().strip())
+                        else:
+                            full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + \
+                                       row["Town/Area"].lower().strip() + row["Post Code"].lower().strip()
+                        # Remove any spaces from the full key
+                        full_key = full_key.replace(" ", "")
+
+                        df = self.levenstein_match(full_key, df)
+
+                        if df.shape[0] != 1:
+                            print(row["Street / Block Name"])
+                            print(house_number)
+                            print(row["Post Code"])
+                            raise ValueError("Investigate")
 
             matching_lookup.append(
                 {
@@ -395,13 +2882,318 @@ class DataLoader:
 
         matching_lookup = pd.DataFrame(matching_lookup)
 
+        if matching_lookup.shape[0] != survey_list.shape[0]:
+            raise ValueError("Mismatch in the number of survey rows and matching lookup rows")
+
+        matching_lookup = matching_lookup[~pd.isnull(matching_lookup["asset_list_row_id"])]
+
+        if matching_lookup["asset_list_row_id"].duplicated().sum():
+            raise ValueError("Duplicated matches in survey list")
+
         # Merge onto the survey list
         survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")
 
         return survey_list
 
+    @staticmethod
+    def correct_ha25_eco3_list(eco3_list):
+        # NEADS DRIVE, postcode with bs305dt, is not found in the asset list
+        eco3_list = eco3_list[
+            ~(eco3_list["Post Code"] == "BS305DT")
+        ]
+        # Drop rows with missings postcode
+        eco3_list = eco3_list[
+            ~pd.isnull(eco3_list["Post Code"])
+        ]
+        # We have a bunch of genuine duplicates
+        eco3_list = eco3_list.drop_duplicates(["NO ", "Street / Block Name", "Post Code"])
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "HALWILL MEADOOW", "HALWILL MEADOW"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Hall Road", "Hall Rd"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "SPRINGFIELD WAY SAINT DAY", "SPRINGFIELD WAY ST DAY"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "BOND SPEAR COURT", "BOND-SPEAR COURT"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "ST.MARYS HILL", "ST MARYS HILL"
+        )
+        # Correct the postcode for edmund road
+        eco3_list["Post Code"] = np.where(
+            (eco3_list["Street / Block Name"] == "EDMUND ROAD") &
+            (eco3_list["Post Code"] == "TR14 8QJ"),
+            "TR15 1BY",
+            eco3_list["Post Code"]
+        )
+        return eco3_list
+
+    @staticmethod
+    def correct_ha50_eco3_list(eco3_list):
+        return eco3_list
+
+    @staticmethod
+    def correct_ha41_eco3_list(eco3_list):
+        return eco3_list
+
+    @staticmethod
+    def correct_ha63_eco3_list(eco3_list):
+        eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+        # Some postcode that aren't in the asset list
+        eco3_list = eco3_list[
+            ~eco3_list["Post Code"].isin(
+                ["NR32 15X", "NR30 2BT"]
+            )
+        ]
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "POUND COTTAGES - BLOOMSBERRY CLOSE", "POUND COTTAGES"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "FREDRICK ROAD", "Frederick Road"
+        )
+
+        # For denmark street, remove the space from the house number
+        eco3_list["NO "] = np.where(
+            eco3_list["Street / Block Name"] == "DENMARK STREET",
+            eco3_list["NO "].str.replace(" ", ""),
+            eco3_list["NO "]
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "OLD HOSPITAL MEWS HOSPITAL WALK", "Old Hospital Mews"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Portland House, Portland Street", "Portland House"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "MIDDLE MARKET STREET", "Middle Market Road"
+        )
+
+        return eco3_list
+
+    @staticmethod
+    def correct_ha117_eco3_list(eco3_list):
+        # Delete rows where postcode is null - there are some placeholder rows where this happens
+        eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "TARRING ROAD", "155 TARRING ROAD"
+        )
+
+        return eco3_list
+
+    @staticmethod
+    def correct_ha56_eco3_list(eco3_list):
+        eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Mount Pleasant, Crewe", "Mount Pleasant"
+        )
+
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "Dutton Close", "Dutton Way"
+        )
+
+        eco3_list["Post Code"] = eco3_list["Post Code"].str.replace(
+            "Ls63nl", "LS6 3NL"
+        )
+
+        # Handle a duplicate
+        eco3_list = eco3_list[
+            ~((eco3_list["Street / Block Name"] == "Mount Pleasant") &
+              (eco3_list["Post Code"] == "CW1 3JF") &
+              (eco3_list["NO "] == 5) &
+              (eco3_list["INSTALL/ CANCELLATION DATE"] == "CANCELLED 20.5.2022"))
+        ]
+
+        return eco3_list
+
+    @staticmethod
+    def correct_ha51_eco3_list(eco3_list):
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "HASELEMERE AVENUE", "HASLEMERE AVENUE"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "THORVILLE GROVE", "THORNVILLE GROVE"
+        )
+        eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+            "MONTBRETA CLOSE", "MONTBRETIA CLOSE"
+        )
+        eco3_list["Post Code"] = np.where(
+            (eco3_list["Street / Block Name"] == "SYDENHAM ROAD") &
+            (eco3_list["Post Code"] == "CR0 2DW"),
+            "CR0 2ED",
+            eco3_list["Post Code"]
+        )
+        # Not in asset list
+        eco3_list = eco3_list[
+            ~((eco3_list["Street / Block Name"] == "WOODLEY LANE") &
+              (eco3_list["Post Code"] == "SM5 2RJ") &
+              (eco3_list["NO "] == "FLAT 3, 11"))
+        ]
+
+        eco3_list["NO "] = np.where(
+            (eco3_list["NO "] == "47 B"),
+            "47B",
+            eco3_list["NO "]
+        )
+
+        return eco3_list
+
+    def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
+
+        eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
+        eco3_list = eco3_list_correction_function(eco3_list)
+
+        asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
+        eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
+
+        if ha_name in ["HA25", "HA56", "HA51"]:
+            # HA25: 317 -> 259
+            missed_postcodes = {
+                postcode for postcode in eco3_list["postcode_no_space"] if
+                postcode not in asset_list["matching_postcode_nospace"].values
+            }
+
+            eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)]
+
+        # For the asset list, we create a matching address without any punctuation
+        # TODO: We should generally just remove puncutation from addresses when matching
+        asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(
+            r'[^\w\s]', '', regex=True
+        )
+        # Remove double spaces
+        asset_list["matching_address_no_punctuation"] = asset_list["matching_address_no_punctuation"].str.replace(
+            "  ", " "
+        )
+
+        matching_lookup = []
+        missed = []
+        for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
+            # if row["eco3_list_row_id"] == "HA51_Eco3_22":
+            #     raise Exception()
+            postcode = row["postcode_no_space"]
+
+            # df will never be empty, since we've already done a check for common postcodes
+            df = asset_list[
+                asset_list["matching_postcode_nospace"].str.contains(postcode)
+            ]
+
+            house_number = row["NO "]
+            if isinstance(house_number, str):
+                house_number = house_number.lower().strip()
+
+            if not any(df["HouseNo"].str.contains(str(house_number))):
+                if "flat" in str(house_number):
+                    house_number = house_number.split("flat")[1].strip()
+
+                # We check if we had an instance of flat x, y
+                if "," in str(house_number):
+                    house_number = house_number.split(",")[0].strip()
+
+                # We may also have a space for an instance of flat x y
+                if " " in str(house_number):
+                    house_number = house_number.split(" ")[0].strip()
+
+            # We must do the house number filter
+            df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
+
+            # Perform a search on streetname
+            # We do this to prevent duplicate matches to properties with the same postcode and house number,
+            # but different streets
+            street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0]
+            street_name_section1 = re.sub(r'[^\w\s]', '', street_name_section1)
+            df = df[df["matching_address_no_punctuation"].str.contains(street_name_section1)]
+
+            if df.empty:
+                missed.append(row["eco3_list_row_id"])
+                continue
+
+            if df.shape[0] > 1:
+                if "flat" in str(row["NO "]).lower():
+                    df = df[df["matching_address"].str.contains("flat")]
+                else:
+                    df = df[~df["matching_address"].str.contains("flat")]
+
+            if df.shape[0] != 1:
+                print(row["Street / Block Name"])
+                print(house_number)
+                print(row["Post Code"])
+                raise ValueError("Investigate")
+
+            matching_lookup.append(
+                {
+                    "eco3_list_row_id": row["eco3_list_row_id"],
+                    "asset_list_row_id": df["asset_list_row_id"].values[0],
+                }
+            )
+
+        # We verify the missed
+        # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2
+        # where many surveys were conducted on house numbers, not in the asset list
+        # 154 missed, 2827 matched for HA 25
+        # For HA56, the number of missed is high at 320, however a big portion of these are due to the block being
+        # listed in the asset list, and individual units being in the survey list
+        if len(missed) != self.UNMATCHED_ECO3[ha_name]:
+            raise ValueError(
+                f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
+            )
+
+        matching_lookup = pd.DataFrame(matching_lookup)
+        # Check dupes as this will cause problems later on
+        if matching_lookup["asset_list_row_id"].duplicated().sum():
+            raise ValueError("Duplicated asset list row ids")
+
+        # Merge onto eco3 list
+        eco3_list = eco3_list.merge(matching_lookup, how="left", on="eco3_list_row_id")
+
+        asset_list.drop(columns=["matching_address_no_punctuation"], inplace=True)
+
+        return eco3_list
+
+    @staticmethod
+    def extract_streetname(address, house_number=None, postcode=None):
+        """
+        Cleans an address by removing the house number and postcode, and converts everything to lower case.
+
+        :param address: The full address as a string.
+        :param house_number: The house number to remove, as a string or integer.
+        :param postcode: The postcode to remove, as a string.
+        :return: The cleaned address.
+        """
+        # Convert everything to lower case
+        address = address.lower()
+
+        if house_number is not None:
+            # Remove the house number
+            address = re.sub(r'\b{}\b'.format(house_number), '', address, flags=re.IGNORECASE).strip()
+
+        if postcode is not None:
+            # Remove the postcode
+            address = re.sub(r'\b{}\b'.format(re.escape(postcode)), '', address, flags=re.IGNORECASE).strip()
+
+        # Get first section before a comma
+        address = address.split(",")[0]
+        # Additional cleaning to remove extra spaces and commas left over
+        address = re.sub(r'\s+', ' ', address)  # Replace multiple spaces with a single space
+        address = re.sub(r'\s*,\s*', ', ', address)  # Clean up space around commas
+
+        return address
+
     def merge_ciga_to_assets(self, asset_list, ciga_list, ha_name):
         matching_lookup = []
+        unmatched_addresses = []
+
         for _, row in tqdm(ciga_list.iterrows(), total=len(ciga_list)):
 
             house_number = row["HouseNo"]
@@ -413,23 +3205,43 @@ class DataLoader:
                 asset_list["matching_address"].str.contains(row["Matched Postcode"].lower().strip())
             ].copy()
 
-            df = df[df["HouseNo"] == str(house_number)]
-            # TODO: Might need to consider street name at some point
+            df = df[df["HouseNo"].astype(str) == str(house_number)]
+            # For ciga, we skip
+            if df.empty:
+                unmatched_addresses.append(
+                    {
+                        "ciga_list_row_id": row["ciga_list_row_id"],
+                        "HouseNo": house_number,
+                        "Matched Postcode": row["Matched Postcode"]
+                    }
+                )
+                continue
+
             if df.shape[0] != 1:
 
-                if df.shape[0] != 1:
-                    df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
-                    if df.shape[0] != 1:
-                        postcode_lower = row["Post Code"].lower()
-                        if postcode_lower in missed_postcodes:
-                            matching_lookup.append(
-                                {
-                                    "survey_list_row_id": row["survey_list_row_id"],
-                                    "asset_list_row_id": None,
-                                }
-                            )
-                            continue
+                # We split house number and postcode out of the matched address for ciga
+                street_name = self.extract_streetname(
+                    address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"]
+                )
+                # We check if any of the rows contains the street name and if they do, filter
+                if any(df["matching_address"].str.replace(",", "").str.contains(street_name)):
+                    df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)]
 
+                if df.shape[0] != 1:
+                    # The final check we do here is to check for the presence of flat in the address
+                    if "flat" in row["Matched Address"].lower():
+                        df = df[df["matching_address"].str.contains("flat")]
+                    else:
+                        df = df[df["matching_address"].str.contains("flat") == False]
+
+                    if df.shape[0] != 1:
+                        full_key = str(row["HouseNo"]).lower().strip() + row["Matched Address"].lower().strip() + row[
+                            "Matched Postcode"].lower().strip()
+                        # Remove any spaces from the full key
+                        full_key = full_key.replace(" ", "")
+                        df = self.levenstein_match(full_key, df)
+
+                    if df.shape[0] != 1:
                         print(row["Street / Block Name"])
                         print(house_number)
                         print(row["Post Code"].lower())
@@ -437,13 +3249,27 @@ class DataLoader:
 
             matching_lookup.append(
                 {
-                    "survey_list_row_id": row["survey_list_row_id"],
+                    "ciga_list_row_id": row["ciga_list_row_id"],
                     "asset_list_row_id": df["asset_list_row_id"].values[0],
                 }
             )
 
+        # We have an acceptable number of ciga failures for each HA
+        if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]:
+            raise ValueError(
+                f"Unmatched addresses for {ha_name} is not as expected, got {len(unmatched_addresses)} unmatched")
+
         matching_lookup = pd.DataFrame(matching_lookup)
 
+        # Check dupes as this will cause problems later on
+        if matching_lookup["asset_list_row_id"].duplicated().any():
+            raise ValueError("Duplicated asset list row ids")
+
+        # Merge onto the ciga list
+        ciga_list = ciga_list.merge(matching_lookup, how='left', on="ciga_list_row_id")
+
+        return ciga_list
+
     @staticmethod
     def identify_built_form_ha6(property_string):
         """
@@ -474,40 +3300,37 @@ class DataLoader:
 
     def load(self):
 
-        if self.use_cache:
-            self.data = read_pickle_from_s3(
+        # Get the december figures, which is just a csv
+        self.december_figures = pd.read_csv(self.december_figures_filepath)
+        # Remove the spaces in HA Name
+        self.december_figures["HA Name"] = self.december_figures["HA Name"].str.replace(" ", "")
+        for col in ["ECO4", "GBIS", "ECO4 remaining", "GBIS remaining"]:
+            self.december_figures[col] = self.december_figures[col].astype("Int64")
+
+        if self.use_cache and not self.rebuild:
+            data = read_pickle_from_s3(
                 bucket_name="retrofit-datalake-dev",
                 s3_file_name="ha-analysis/batch3-inputs.pickle",
             )
-            return
+        else:
+            data = {}
 
-        data = {}
         for filepath in self.directories:
             ha_name = filepath.split("/")[2]
+            if ha_name in data:
+                continue
             # Load asset list
-            logger.info("Loading asset list for {}".format(ha_name))
-            asset_list, survey_list, ciga_list = self.load_asset_list(
+            logger.info("Loading data for {}".format(ha_name))
+            asset_list, survey_list, ciga_list, eco3_list = self.load_asset_list(
                 filepath=filepath,
                 ha_name=ha_name,
             )
 
-            if file_config.get("survey_list"):
-                # TODO: Delete this
-                logger.info("Loading survey list for {}".format(ha_name))
-                survey_list, matched_lookup = self.load_survey_list(
-                    asset_list=asset_list,
-                    file_path=file_config["survey_list"]["filepath"],
-                    ha_name=ha_name,
-                    sheet_name=file_config["survey_list"]["sheetname"]
-                )
-            else:
-                survey_list = None
-                matched_lookup = None
-
             data[ha_name] = {
                 "asset_list": asset_list,
                 "survey_list": survey_list,
-                "matched_lookup": matched_lookup
+                "ciga_list": ciga_list,
+                "eco3_list": eco3_list
             }
 
         self.data = data
@@ -520,6 +3343,504 @@ class DataLoader:
             s3_file_name="ha-analysis/batch3-inputs.pickle",
         )
 
+    def ha_facts_and_figures(self):
+        """
+        This function will return a dictionary of facts and figures for each HA
+        :return:
+        """
+
+        scheme_map = {
+            "ECO4": "ECO4",
+            "AFFORDABLE WARMTH": "ECO4",
+            "ECO4 A/W": "ECO4",
+            "ECO4 GBIS (ECO+)": "GBIS",
+            "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS",
+            "ECO4 AFFORDABLE WARMTH": "ECO4",
+            "Affordable Warmth": "ECO4",
+            "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS",
+            "ECO4 PPS": "ECO4",
+            "AFFORDABLE WARMTH / REMEDIAL": "ECO4",
+            "AFF0RDALE WARMTH": "ECO4",
+            "ECO 4 RdSAP CL": "ECO4",
+            "Affordable Warmth (R) ": "ECO4",
+            "Affordable Warmth ": "ECO4",
+            "ECO 4 AFFORDABLE WARMTH": "ECO4",
+        }
+
+        # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
+        # treat these as similar to subject to CIGA, and therefore unconfirmed worked that could fail. There
+        # are only a small volume of properties for which we see this
+        eco_eligibility_map = {
+            "not eligble": "not eligible",
+            "eco 4(subject to ciga)": "eco4 (subject to ciga)",
+            "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4 (subject to archetype check)": "eco4 (subject to archetype)",
+            "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4  (subject to ciga)": "eco4 (subject to ciga)",
+            "eco4(subject to ciga)": "eco4 (subject to ciga)",
+            "eco4 subject to ciga": "eco4 (subject to ciga)",
+            "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
+            "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)",
+        }
+
+        ha_facts_and_figures = []
+        for ha_name, data_assets in self.data.items():
+            asset_list = data_assets["asset_list"].copy()
+            survey_list = data_assets["survey_list"].copy()
+            ciga_list = data_assets["ciga_list"].copy()
+            eco3_list = data_assets.get("eco3_list", pd.DataFrame())
+
+            asset_list_starting_size = asset_list.shape[0]
+
+            # Change the column name if it's ECO eligibility
+            asset_list = asset_list.rename(
+                columns={
+                    "ECO eligibility": "ECO Eligibility",
+                    "ECO Eligibilty": "ECO Eligibility",
+                },
+            )
+            # Remove surplus whitespace from the ECO Eligibility column
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.strip()
+            # Push to lower case
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower()
+            # Remap
+            asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].replace(eco_eligibility_map)
+
+            if not ciga_list.empty:
+                # We merge on ciga and update the status to reflect if it has failed ciga or not
+                # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
+                # check
+
+                ciga_list_to_merge = ciga_list[["asset_list_row_id", "Guarantee"]].copy()
+                ciga_list_to_merge = ciga_list_to_merge[~pd.isnull(ciga_list_to_merge["asset_list_row_id"])]
+
+                asset_list = asset_list.merge(ciga_list_to_merge, how='left', on="asset_list_row_id")
+
+                asset_list["ECO Eligibility"] = np.where(
+                    (
+                        asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) &
+                        (asset_list["Guarantee"] == "Yes")
+                    ),
+                    "failed ciga",
+                    asset_list["ECO Eligibility"]
+                )
+
+                # We replace any remaining "Subject to CIGA" with pass Ciga
+                asset_list["ECO Eligibility"] = np.where(
+                    (
+                        asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) &
+                        (asset_list["Guarantee"] == "No")
+                    ),
+                    "eco4 - passed ciga",
+                    asset_list["ECO Eligibility"]
+                )
+
+                asset_list = asset_list.drop(columns=["Guarantee"])
+
+            # Update the asset list with the categorisations and rename changes
+            if asset_list.shape[0] != asset_list_starting_size:
+                raise ValueError("The asset list has changed in size")
+
+            # If we have eco3 surveys, we set a property to not eligible
+            if not eco3_list.empty:
+                eco3_list_to_merge = eco3_list[["asset_list_row_id"]].copy()
+                eco3_list_to_merge["has_eco3"] = True
+                asset_list = asset_list.merge(
+                    eco3_list_to_merge, how="left", on="asset_list_row_id"
+                )
+
+                if asset_list.shape[0] != asset_list_starting_size:
+                    raise ValueError("The asset list has changed in size, when merging on eco3")
+
+                # Any rows that have an eco3 survey are set to not eligible
+                asset_list["ECO Eligibility"] = np.where(
+                    asset_list["has_eco3"] == True,
+                    "not eligible",
+                    asset_list["ECO Eligibility"]
+                )
+                asset_list = asset_list.drop(columns=["has_eco3"])
+
+            # Report on sales
+            sales_report = {}
+            if not survey_list.empty:
+                scheme_column = survey_list.columns[0]
+                # Remap the values in the scheme column
+                survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+                # We clean up the survey list installation or cancelled
+                if "INSTALLED OR CANCELLED" in survey_list.columns:
+                    survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+                    # Remove all punctuation
+                    survey_list["installed_or_cancelled_clean"] = survey_list[
+                        "installed_or_cancelled_clean"].str.replace(
+                        r'[^\w\s]', '', regex=True
+                    )
+                    # Remove double spaces
+                    survey_list["installed_or_cancelled_clean"] = survey_list[
+                        "installed_or_cancelled_clean"].str.replace(
+                        r'\s+', ' ', regex=True
+                    )
+                    # Remove trailing spaces
+                    survey_list["installed_or_cancelled_clean"] = survey_list[
+                        "installed_or_cancelled_clean"].str.strip()
+
+                    survey_list["installation_status"] = None
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+                        "installed",
+                        survey_list["installation_status"]
+                    )
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+                        "cancelled",
+                        survey_list["installation_status"]
+                    )
+                    # Find partial installations
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+                        "in progress",
+                        survey_list["installation_status"]
+                    )
+                    # Find partial cancellations
+                    # TODO: We might have more indications of partial cancellations
+                    survey_list["installation_status"] = np.where(
+                        survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+                        "cancelled",
+                        survey_list["installation_status"]
+                    )
+                else:
+                    # We have some examples, e.g. HA28, where we do not have the installed or cancelled column
+                    if 'INSTALL/ CANCELLATION DATE' in survey_list.columns:
+                        survey_list["installation_status"] = np.where(
+                            survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+                            "cancelled",
+                            "installed",
+                        )
+                    else:
+                        survey_list["installation_status"] = np.where(
+                            survey_list['INSTALL / CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+                            "cancelled",
+                            "installed",
+                        )
+
+                # Finally, for other cases, we set the status to "in progress"
+                survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+
+                # We concatenate the scheme name with the installation status
+                survey_list["installation_status"] = (
+                    survey_list[scheme_column] + " - " + survey_list["installation_status"]
+                )
+
+                # We get the sales
+                sales_report = {
+                    "ECO4 - surveys sold": survey_list.shape[0],
+                    **survey_list["installation_status"].value_counts().to_dict()
+                }
+
+                # We find some cases where properties have sold but are missing CIGA checks
+                survey_list_to_merge = survey_list[["asset_list_row_id", "installation_status"]].copy()
+                survey_list_to_merge["has_a_survey_record"] = True
+                survey_list_to_merge = survey_list_to_merge[~pd.isnull(survey_list_to_merge["asset_list_row_id"])]
+
+                asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
+                # Update the cases where properties have sold, but are missing a CIGA check
+                # If we don't have a CIGA list, we set the value to ECO4
+                set_to = "eco4 - passed ciga" if not ciga_list.empty else "eco4"
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"].str.contains("subject to ciga")) & (
+                        asset_list["has_a_survey_record"] == True
+                    ),
+                    set_to,
+                    asset_list["ECO Eligibility"]
+                )
+                # Update the cases where a property has been marked as eligible for GBIS, but sold for ECO4
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "gbis") & (
+                        asset_list["installation_status"].isin(
+                            ["ECO4 - installed", "ECO4 - cancelled", "ECO4 - in progress"]
+                        )
+                    ),
+                    "eco4",
+                    asset_list["ECO Eligibility"]
+                )
+                # Update the cases where a property was marked as eligible for ECO4, but sold for GBIS
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"].isin(
+                        [
+                            "eco4",
+                            "eco4 (subject to ciga)",
+                            "eco4 - passed ciga",
+                            "failed ciga",
+                            "eco4 (subject to archetype)",
+                            "eco4 (subject to ciga) (subject to archetype)"
+                        ]
+                    )) & (
+                        asset_list["installation_status"].isin(
+                            ["GBIS - installed", "GBIS - cancelled", "GBIS - in progress"]
+                        )
+                    ),
+                    "gbis",
+                    asset_list["ECO Eligibility"]
+                )
+                # Update the cases where a property is marked as not eligible, but sold for GBIS
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "not eligible") & (
+                        asset_list["installation_status"].isin(
+                            ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
+                        )),
+                    "gbis",
+                    asset_list["ECO Eligibility"]
+                )
+
+                # Update the cases where a property is marked as not eligible, but sold for ECO4
+                asset_list["ECO Eligibility"] = np.where(
+                    (asset_list["ECO Eligibility"] == "not eligible") & (
+                        asset_list["installation_status"].isin(
+                            ["ECO4 - in progress", "ECO4 - installed", "ECO4 - cancelled"]
+                        )
+                    ),
+                    "eco4",
+                    asset_list["ECO Eligibility"]
+                )
+
+                asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"])
+
+                # Update the survey list with installation status
+                self.data[ha_name]["survey_list"] = survey_list
+
+            # Insert updated asset list
+            self.data[ha_name]["asset_list"] = asset_list
+
+            ha_facts_and_figures.append(
+                {
+                    "HA Name": ha_name,
+                    **asset_list["ECO Eligibility"].value_counts().to_dict(),
+                    **sales_report
+                }
+            )
+
+        ha_facts_and_figures = pd.DataFrame(ha_facts_and_figures)
+        ha_facts_and_figures = ha_facts_and_figures.drop(
+            columns=["not eligible"]
+        )
+
+        ha_facts_and_figures = ha_facts_and_figures.fillna(0)
+        # Make all columns apart from HA NAme integers
+        for col in ha_facts_and_figures.columns[1:]:
+            ha_facts_and_figures[col] = ha_facts_and_figures[col].astype(int)
+
+        ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
+        ha_facts_and_figures = ha_facts_and_figures.fillna(0)
+
+        self.facts_and_figures = ha_facts_and_figures
+
+
+def get_property_type_and_built_form(property_meta, ha_name):
+    if ha_name in ["HA44"]:
+        return None, None
+
+    if ha_name == "HA1":
+        property_type = property_meta["Asset Type"]
+        # We correct a small error
+        if property_type == "a":
+            property_type = "House"
+
+        # Remap bedsits to flats
+        if property_type in ["Bedsit", "Room"]:
+            property_type = "Flat"
+
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None)
+    elif ha_name == "HA2":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip())
+        built_form = None
+    elif ha_name == "HA5":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
+    elif ha_name == "HA6":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
+        built_form = property_meta["built_form"]
+    elif ha_name == "HA7":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"])
+        built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"])
+    elif ha_name == "HA8":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA9":
+        property_description = property_meta["Asset Type"].strip().lower()
+        if "house" in property_description:
+            return "House", None
+
+        if "flat" in property_description:
+            return "Flat", None
+
+        if "bungalow" in property_description:
+            return "Bungalow", None
+
+        if "maisonette" in property_description:
+            return "Maisonette", None
+
+        return None, None
+    elif ha_name == "HA11":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA12":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset_Type1"].strip())
+        built_form = None
+    elif ha_name == "HA13":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Type Cd"].strip())
+        built_form = None
+    elif ha_name == "HA14":
+        if property_meta["Asset Type Description"] == "Block - Repair":
+            # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
+            if "room" in property_meta["Address 1"].lower():
+                property_type = "House"
+            else:
+                property_type = "Flat"
+
+        else:
+            property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][
+                property_meta["Asset Type Description"]
+            ]
+
+        built_form = None
+    elif ha_name == "HA15":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA16":
+        config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
+        property_type = config.get("property-type")
+        built_form = config.get("built-form")
+    elif ha_name == "HA17":
+        return property_meta["property_type"], None
+    elif ha_name == "HA18":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
+    elif ha_name == "HA19":
+        property_type = property_meta["Dwelling Type"]
+        built_form = None
+    elif ha_name == "HA20":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
+    elif ha_name == "HA21":
+        property_description = property_meta["Property Type"].strip().lower()
+        if "house" in property_description:
+            return "House", None
+
+        if "flat" in property_description:
+            return "Flat", None
+
+        if "bungalow" in property_description:
+            return "Bungalow", None
+
+        if "maisonette" in property_description:
+            return "Maisonette", None
+
+        return None, None
+    elif ha_name == "HA24":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA25":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]]
+        built_form = None
+    elif ha_name == "HA27":
+        property_type = property_meta["Property Type"]
+        built_form = None
+    elif ha_name == "HA28":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Property Type - Academy"]]
+        built_form = None
+    elif ha_name == "HA30":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["A_AssetType"]]
+        built_form = None
+    elif ha_name == "HA31":
+        property_description = property_meta["A_AssetType"].strip().lower()
+        if "house" in property_description:
+            return "House", None
+
+        if "flat" in property_description:
+            return "Flat", None
+
+        if "bungalow" in property_description:
+            return "Bungalow", None
+
+        if "maisonette" in property_description:
+            return "Maisonette", None
+
+        return None, None
+
+    elif ha_name == "HA32":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling type"].strip())
+        built_form = None
+    elif ha_name == "HA34":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA35":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type Grouping"].strip())
+        built_form = None
+    elif ha_name == "HA37":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PROPERTY TYPE"].strip())
+        built_form = None
+    elif ha_name == "HA39":
+        property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})
+        property_type = property_type_config.get("property_type", None)
+        built_form = property_type_config.get("built_form", None)
+
+        if property_type is None:
+            # We check for the presence of room or flat
+            if "flat" in property_meta["matching_address"]:
+                property_type = "Flat"
+            else:
+                property_type = "House"
+    elif ha_name == "HA41":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Archetype"].strip())
+        built_form = None
+    elif ha_name == "HA42":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip())
+        built_form = None
+    elif ha_name == "HA45":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property type"].strip())
+        built_form = None
+    elif ha_name == "HA48":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA49":
+        property_type = property_meta["Property Class"].strip()
+        built_form = None
+    elif ha_name == "HA50":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA51":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+        built_form = None
+    elif ha_name == "HA52":
+        if property_meta["Property Type"] is None:
+            return None, None
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HA54":
+        property_type = property_meta["Property Type"]
+        built_form = None
+    elif ha_name == "HA56":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type Description"].strip())
+        built_form = None
+    elif ha_name == "HA63":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PropertyType"].strip())
+        built_form = None
+    elif ha_name == "HA107":
+        property_type = property_meta.get("property_type", None)
+        built_form = property_meta.get("built_form", None)
+    elif ha_name == "HA117":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+        built_form = None
+    elif ha_name == "HAXX":
+        return property_meta["Property Type"].split(":")[0].strip(), None
+    elif ha_name == "HAXXX":
+        property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Unit Description"].strip())
+        built_form = None
+    else:
+        raise NotImplementedError("Implement me")
+
+    return property_type, built_form
+
 
 def get_epc_data(
     loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=True
@@ -527,84 +3848,6 @@ def get_epc_data(
     if not loader.data:
         raise ValueError("Data not found - please run loader.load() first")
 
-    property_type_lookup = {
-        "ha_1": {
-            "built_form": {
-                'Mid Terrace': 'Mid-Terrace',
-                'Semi-Detached': 'Semi-Detached',
-                'End Terrace': 'End-Terrace',
-                'Detached': 'Detached',
-                'Enclosed Mid': 'Mid-Terrace',
-                'Detached Local Connect': 'Detached',
-            }
-        },
-        "ha_6": {
-            "property_type": {
-                'HOUSE': "House",
-                'GROUND FLOOR FLAT': "Flat",
-                'UPPER FLOOR FLAT': "Flat",
-                'MAISONETTE': "Maisonette",
-                'BUNGALOW': "Bungalow",
-                'WARDEN BUNGALOW': "Bungalow",
-                'WARDEN FLAT': "Flat",
-                'EXTRACARE SCHEME': "Flat",
-            }
-        },
-        "ha_14": {
-            "property_type": {
-                "House": "House",
-                "Flat": "Flat",
-                "Bungalow": "Bungalow",
-                "Maisonette": "Maisonette",
-            }
-        },
-        "ha_39": {
-            "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
-            "1st floor flat": {"property_type": "Flat", "built_form": None},
-            "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"},
-            "Ground floor flat": {"property_type": "Flat", "built_form": None},
-            "End terrace house": {"property_type": "House", "built_form": "End-Terrace"},
-            "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"},
-            "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"},
-            "2nd floor flat": {"property_type": "Flat", "built_form": None},
-            "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"},
-            "3rd floor flat": {"property_type": "Flat", "built_form": None},
-            "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
-            "Maisonette": {"property_type": "Maisonette", "built_form": None},
-            "Detached house": {"property_type": "House", "built_form": "Detached"},
-            "Lower ground floor flat": {"property_type": "Flat", "built_form": None},
-            "Dormer bungalow": {"property_type": "Bungalow", "built_form": None},
-            "Basement flat": {"property_type": "Flat", "built_form": None},
-            "Cluster House": {"property_type": "House", "built_form": "Detached"},
-            "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None},
-            "Ground floor flat with study": {"property_type": "Flat", "built_form": None},
-            "4th floor flat": {"property_type": "Flat", "built_form": None},
-            "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
-            "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
-        },
-        "ha_107": {
-            "property_type": {
-                "HOUSE": "House",
-                "BUNGALOW": "Bungalow",
-                "GRD FLOOR FLAT": "Flat",
-                "FIRST FLOOR FLAT": "Flat",
-                "SHELTERED BUNGALOW": "Bungalow",
-                "MAISONETTE": "Maisonette",
-                "SECOND FLOOR FLAT": "Flat",
-                "SHELTERED FIRST FLR": "Flat",
-                "SHELTERED GROUND FLR": "Flat",
-                "GRD FLOOR BED SIT": "House"
-            },
-            "built_form": {
-                "Semi Detached": "Semi-Detached",
-                "Mid Terrace": "Mid-Terrace",
-                "End Terrace": "End-Terrace",
-                "Detached": "Detached",
-                "Detatched": "Detached",
-            }
-        }
-    }
-
     outputs = {}
     for ha_name, data_assets in loader.data.items():
 
@@ -633,82 +3876,21 @@ def get_epc_data(
         results = []
         scoring_data = []
         nodata = []
+        failed_model_rows = []
         for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
 
             if property_meta["matching_postcode"] is None:
                 continue
 
-            if ha_name == "ha_1":
-                property_type = property_meta["Asset Type"]
-                # We correct a small error
-                if property_type == "a":
-                    property_type = "House"
-
-                # Remap bedsits to flats
-                if property_type in ["Bedsit", "Room"]:
-                    property_type = "Flat"
-
-                built_form = property_type_lookup[ha_name]["built_form"].get(property_meta["Property Type"], None)
-            elif ha_name == "ha_6":
-                property_type = property_type_lookup[ha_name]["property_type"][property_meta["Dwelling type"]]
-                built_form = property_meta["built_form"]
-            elif ha_name == "ha_14":
-                if property_meta["Asset Type Description"] == "Block - Repair":
-                    # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
-                    if "room" in property_meta["Address 1"].lower():
-                        property_type = "House"
-                    else:
-                        property_type = "Flat"
-
-                else:
-                    property_type = property_type_lookup[ha_name]["property_type"][
-                        property_meta["Asset Type Description"]
-                    ]
-
-                built_form = None
-            elif ha_name == "ha_39":
-
-                property_type_config = property_type_lookup[ha_name].get(property_meta["ConstructionStyle"], {})
-                property_type = property_type_config.get("property_type", None)
-                built_form = property_type_config.get("built_form", None)
-
-                if property_type is None:
-                    # We check for the presence of room or flat
-                    if "flat" in property_meta["matching_address"]:
-                        property_type = "Flat"
-                    else:
-                        property_type = "House"
-            elif ha_name == "ha_107":
-
-                dwelling_style = property_meta["Dwelling Style"]
-                if isinstance(dwelling_style, str):
-                    dwelling_style = dwelling_style.strip()
-
-                property_type = property_type_lookup[ha_name]["property_type"].get(property_meta["DwellingType"])
-                built_form = property_type_lookup[ha_name]["built_form"].get(dwelling_style, None)
-
-                if property_type is None:
-                    if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]:
-                        property_type = "House"
-
-                    if "flat" in property_meta["Wall Construction"].lower():
-                        property_type = "Flat"
-
-                    if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0):
-                        # Hand a few specific cases
-                        property_type = "Bungalow"
-
-                    if property_meta["Street"] == "School View":
-                        property_type = "Bungalow"
-
-            else:
-                raise NotImplementedError("Implement me")
+            property_type, built_form = get_property_type_and_built_form(
+                property_meta=property_meta, ha_name=ha_name
+            )
 
             searcher = SearchEpc(
                 address1=str(property_meta["HouseNo"]),
                 postcode=property_meta["matching_postcode"],
                 auth_token=EPC_AUTH_TOKEN,
-                os_api_key=None,
+                os_api_key="",
                 full_address=property_meta["matching_address"]
             )
             searcher.ordnance_survey_client.property_type = property_type
@@ -739,9 +3921,48 @@ def get_epc_data(
             eligibility.check_gbis_warmfront()
             eligibility.check_eco4_warmfront()
 
-            if (not eligibility.eco4_warmfront["eligible"]) and (
-                not eligibility.gbis_warmfront
-            ) and consider_penultimate_epc:
+            # We check the conditions for checking the penultimate epc
+            identified_for_gbis = property_meta["ECO Eligibility"] in ["gbis"]
+            identified_for_eco4 = property_meta["ECO Eligibility"] in ["eco4"]
+            subject_to_ciga = property_meta["ECO Eligibility"] in [
+                "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"
+            ]
+
+            # condition 1 - identified for gbis and not eligible
+            condition_1 = (identified_for_gbis and not eligibility.gbis_warmfront
+                           and not eligibility.eco4_warmfront["eligible"]
+                           ) & consider_penultimate_epc
+
+            # condition 2 - identified for eco4 and not eligible
+            condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[
+                "eligible"]) & consider_penultimate_epc
+
+            # successfully identigied gbis
+            condition_3 = (
+                identified_for_gbis and (eligibility.gbis_warmfront or eligibility.eco4_warmfront["eligible"])
+            )
+
+            # Nothing identified
+            condition_4 = (
+                not identified_for_gbis
+                and not identified_for_eco4
+                and not eligibility.gbis_warmfront
+                and not subject_to_ciga
+                and not eligibility.eco4_warmfront["eligible"]
+            )
+
+            # Not identified but seemingly eligible for eco4 or gbis
+            condition_5 = (
+                not identified_for_gbis and not identified_for_eco4 and (
+                eligibility.eco4_warmfront["eligible"] or eligibility.gbis_warmfront
+            )
+            )
+
+            condition_6 = (
+                subject_to_ciga and not eligibility.eco4_warmfront["eligible"]
+            )
+
+            if condition_1 or condition_2:
                 # We check the penultimate epc
                 eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
                 eligibility.check_gbis_warmfront()
@@ -750,53 +3971,55 @@ def get_epc_data(
                 # We don't update just to make data cleaning easier
                 if penultimate_epc.get("estimated") is None:
                     older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+            elif condition_3 or condition_4 or condition_5 or condition_6:
+                pass
+            else:
+                NotImplementedError("Implement me")
 
             # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
             # Loft MUST be suitable
             cavity_age = None
             if (
-                eligibility.walls["is_cavity_wall"] and
-                eligibility.walls["is_filled_cavity"] and
-                eligibility.loft["suitability"] and
-                eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+                identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]
             ):
                 # We check the age of the cavity and if it's particularly old, we flag it
                 cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
 
-            # Full checks
-            eligibility.check_gbis()
-            eligibility.check_eco4()
-
             if eligibility.eco4_warmfront["eligible"]:
                 if eligibility.epc["uprn"] == "":
                     eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
-
-                scoring_dictionary = prepare_model_data_row(
-                    property_id=property_meta["asset_list_row_id"],
-                    modelling_epc=eligibility.epc,
-                    cleaned=cleaned,
-                    cleaning_data=cleaning_data,
-                    created_at=created_at,
-                    old_data=older_epcs,
-                    full_sap_epc=full_sap_epc,
-                    photo_supply_lookup=photo_supply_lookup,
-                    floor_area_decile_thresholds=floor_area_decile_thresholds
-                )
-                scoring_data.extend(scoring_dictionary)
+                try:
+                    scoring_dictionary = prepare_model_data_row(
+                        property_id=property_meta["asset_list_row_id"],
+                        modelling_epc=eligibility.epc,
+                        cleaned=cleaned,
+                        cleaning_data=cleaning_data,
+                        created_at=created_at,
+                        old_data=older_epcs,
+                        full_sap_epc=full_sap_epc,
+                        photo_supply_lookup=photo_supply_lookup,
+                        floor_area_decile_thresholds=floor_area_decile_thresholds
+                    )
+                    scoring_data.extend(scoring_dictionary)
+                except Exception as e:
+                    # If we fail, we just keep a record of it
+                    failed_model_rows.append(
+                        property_meta["asset_list_row_id"]
+                    )
 
             results.append(
                 {
                     "row_id": property_meta["asset_list_row_id"],
                     "uprn": eligibility.epc["uprn"],
+                    "is_estimated": searcher.newest_epc.get("estimated") is not None,
                     "property_type": eligibility.epc["property-type"],
-                    "gbis_eligible": eligibility.gbis_warmfront,
                     "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                     "eco4_message": eligibility.eco4_warmfront["message"],
+                    "eco4_strict": eligibility.eco4_warmfront["strict"],
+                    "gbis_eligible": eligibility.gbis_warmfront["eligible"],
+                    "gbis_message": eligibility.gbis_warmfront["message"],
+                    "gbis_strict": eligibility.gbis_warmfront["strict"],
                     "sap": float(eligibility.epc["current-energy-efficiency"]),
-                    "gbis_eligible_future": eligibility.gbis["eligible"],
-                    "gbis_eligible_future_message": eligibility.gbis["message"],
-                    "eco4_eligible_future": eligibility.eco4["eligible"],
-                    "eco4_eligible_future_message": eligibility.eco4["message"],
                     # Property components
                     "roof": eligibility.roof["clean_description"],
                     "walls": eligibility.walls["clean_description"],
@@ -806,92 +4029,99 @@ def get_epc_data(
                     "date_epc": eligibility.epc["lodgement-date"],
                     "loft_thickness": eligibility.roof["insulation_thickness"],
                     "cavity_age": cavity_age,
-                    **eligibility.walls,
-                    **eligibility.roof,
-                    "is_estimated": searcher.newest_epc.get("estimated") is not None,
                     "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
                     "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
                 }
             )
 
-        scoring_df = pd.DataFrame(scoring_data)
-        scoring_df = scoring_df.drop(
-            columns=[
-                "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
-                "carbon_ending"
-            ]
-        )
-
-        model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
-
-        # scoring_df["is_community"].value_counts()
-        # scoring_df[scoring_df["is_community"] == "Unknown"]
-        # property_meta = asset_list[asset_list["asset_list_row_id"] == "ha_67238"].squeeze()
-
-        all_predictions = model_api.predict_all(
-            df=scoring_df,
-            bucket="retrofit-data-dev",
-            prediction_buckets={
-                "sap_change_predictions": "retrofit-sap-predictions-dev",
-                "heat_demand_predictions": "retrofit-heat-predictions-dev",
-                "carbon_change_predictions": "retrofit-carbon-predictions-dev"
-            }
-        )
-
         results_df = pd.DataFrame(results)
+        scoring_df = pd.DataFrame(scoring_data)
+        results_df["post_install_sap"] = None
+        results_df["eligibility_classification"] = None
 
-        predictions = all_predictions["sap_change_predictions"].copy()
-
-        predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
-            results_df[["row_id", "sap"]], how="left", on="row_id"
-        )
-        predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
-        predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
-
-        results_df = results_df.merge(
-            predictions[["sap_uplift", "row_id"]],
-            how="left",
-            on="row_id"
-        )
-        results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
-
-        eligibility_assessment = []
-        for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
-            # The upgrade requirements are dependent on the current SAP
-
-            # If the property is an F or G, it only needs to upgrade to an %
-            if row["sap"] <= 38:
-                if row["post_install_sap"] >= 57:
-                    eligibility_classification = "highest confidence"
-                elif row["post_install_sap"] >= 55:
-                    eligibility_classification = "high confidence"
-                elif row["post_install_sap"] >= 53:
-                    eligibility_classification = "medium confidence"
-                else:
-                    eligibility_classification = "unlikely"
-            else:
-
-                if row["post_install_sap"] >= 71:
-                    eligibility_classification = "highest confidence"
-                elif row["post_install_sap"] >= 69:
-                    eligibility_classification = "high confidence"
-                elif row["post_install_sap"] >= 67:
-                    eligibility_classification = "medium confidence"
-                else:
-                    eligibility_classification = "unlikely"
-
-            eligibility_assessment.append(
-                {
-                    "row_id": row["row_id"],
-                    "eligibility_classification": eligibility_classification
-                }
+        if not scoring_df.empty:
+            scoring_df = scoring_df.drop(
+                columns=[
+                    "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                    "carbon_ending"
+                ]
             )
 
-        eligibility_assessment = pd.DataFrame(eligibility_assessment)
+            model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
+            model_api.MODEL_PREFIXES = ["sap_change_predictions"]
 
-        results_df = results_df.merge(
-            eligibility_assessment, how="left", on="row_id"
-        )
+            scoring_df["id"] = scoring_df["id"] + "phase=0"
+            # We split up the scoring_df and score
+            predictions = []
+            to_loop_over = range(0, scoring_df.shape[0], 400)
+            for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+                predictions_dict = model_api.predict_all(
+                    df=scoring_df.iloc[chunk:chunk + 400],
+                    bucket="retrofit-data-dev",
+                    prediction_buckets={
+                        "sap_change_predictions": "retrofit-sap-predictions-dev",
+                    }
+                )
+
+                predictions.append(predictions_dict["sap_change_predictions"])
+
+            predictions = pd.concat(predictions)
+            predictions_size = predictions.shape[0]
+
+            predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+                results_df[["row_id", "sap"]], how="left", on="row_id"
+            )
+            if predictions.shape[0] != predictions_size:
+                raise ValueError("Predictions size has changed")
+            predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+
+            results_df = results_df.merge(
+                predictions[["sap_uplift", "row_id"]],
+                how="left",
+                on="row_id"
+            )
+            results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+            eligibility_assessment = []
+            for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+                # The upgrade requirements are dependent on the current SAP
+
+                # If the property is an F or G, it only needs to upgrade to an %
+                if row["sap"] <= 38:
+                    if row["post_install_sap"] >= 57:
+                        eligibility_classification = "highest confidence"
+                    elif row["post_install_sap"] >= 55:
+                        eligibility_classification = "high confidence"
+                    elif row["post_install_sap"] >= 53:
+                        eligibility_classification = "medium confidence"
+                    else:
+                        eligibility_classification = "unlikely"
+                else:
+
+                    if row["post_install_sap"] >= 71:
+                        eligibility_classification = "highest confidence"
+                    elif row["post_install_sap"] >= 69:
+                        eligibility_classification = "high confidence"
+                    elif row["post_install_sap"] >= 67:
+                        eligibility_classification = "medium confidence"
+                    else:
+                        eligibility_classification = "unlikely"
+
+                eligibility_assessment.append(
+                    {
+                        "row_id": row["row_id"],
+                        "eligibility_classification": eligibility_classification
+                    }
+                )
+
+            eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+            # Make sure the results haven't changed in size
+            results_df = results_df.merge(
+                eligibility_assessment, how="left", on="row_id"
+            )
+            if results_df.shape[0] != len(results):
+                raise ValueError("results has changed size")
 
         # We store the results in S3 as a pickle
         save_pickle_to_s3(
@@ -914,430 +4144,769 @@ def get_epc_data(
 
 
 def get_col_widths(dataframe):
-    # First we find the maximum length of the index column
-    idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
-    # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
-    return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+    # Define a maximum width for any column to prevent excessively wide columns
+    max_allowed_width = 25
+
+    # Calculate widths for columns
+    widths = []
+
+    if isinstance(dataframe.columns, pd.MultiIndex):
+        # For MultiIndex, calculate max width considering the header and data
+        header_widths = [max(len(str(item)) for item in col) + 2 for col in dataframe.columns.values]  # +2 for padding
+        for i, column in enumerate(dataframe.columns):
+            max_data_width = max(dataframe[column].astype(str).apply(len).max(), header_widths[i])
+            widths.append(min(max_data_width, max_allowed_width))
+    else:
+        # For non-MultiIndex, calculate width normally
+        for col in dataframe.columns:
+            # Calculate the max length of data or column name and limit it
+            max_length = max(dataframe[col].astype(str).apply(len).max(), len(str(col)) + 2)  # +2 for padding
+            widths.append(min(max_length, max_allowed_width))
+
+    return widths
 
 
-def analyse_ha_data(outputs, loader):
-    """
-    The approach we take within this function is the following:
-    For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The
-    characterisation can be broken down as the following:
-    1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria
-    2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to
-    a CIGA check
-    3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft
-    insulation
-    4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under
-    any cirsumstances, given the available data
+# def analyse_ha_data(outputs, loader):
+#     """
+#     The approach we take within this function is the following:
+#     For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The
+#     characterisation can be broken down as the following:
+#     1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria
+#     2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to
+#     a CIGA check
+#     3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft
+#     insulation
+#     4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under
+#     any cirsumstances, given the available data
+#
+#     Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would
+#     qualify under the strictest criteria, and mark these as potential additional opportunities.
+#
+#     :return:
+#     """
+#
+#     eco4_rate = 1710
+#     gbis_rate = 600
+#     # old_eco4_rate = 1456
+#     old_gbis_rate = 432
+#
+#     epc_c_threshold = 80
+#     scheme_map = {
+#         "ECO4": "ECO4",
+#         "AFFORDABLE WARMTH": "ECO4",
+#         "ECO4 A/W": "ECO4",
+#         "ECO4 GBIS (ECO+)": "GBIS"
+#     }
+#
+#     ha_analysis_results = []
+#     total_revenue_results = []
+#     for ha_name, datasets in outputs.items():
+#         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+#
+#         results_df = datasets["results_df"].copy()
+#
+#         analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
+#             columns={"row_meaning": "asset_identification_status"}
+#         ).merge(
+#             results_df,
+#             how="left",
+#             right_on="row_id",
+#             left_on="asset_list_row_id"
+#         )
+#
+#         analysis_data["is_remaining"] = True
+#
+#         n_sold_eco4 = 0
+#         n_sold_gbis = 0
+#         if not inputs["survey_list"].empty:
+#             # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had
+#             # a survey)
+#             survey_list = inputs["survey_list"].copy()
+#
+#             # TODO: TEMP
+#             scheme_column = survey_list.columns[0]
+#             # We clean up the survey list installation or cancelled
+#             survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+#             # Remove all punctuation
+#             survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+#                 r'[^\w\s]', '', regex=True
+#             )
+#             # Remove double spaces
+#             survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+#                 r'\s+', ' ', regex=True
+#             )
+#             # Remove trailing spaces
+#             survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
+#
+#             # Remap the values in the scheme column
+#             survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+#
+#             survey_list["installation_status"] = None
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+#                 "installed",
+#                 survey_list["installation_status"]
+#             )
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+#                 "cancelled",
+#                 survey_list["installation_status"]
+#             )
+#             # Find partial installations
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+#                 "partially installed",
+#                 survey_list["installation_status"]
+#             )
+#             # Find partial cancellations
+#             # TODO: We might have more indications of partial cancellations
+#             survey_list["installation_status"] = np.where(
+#                 survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+#                 "partially cancelled",
+#                 survey_list["installation_status"]
+#             )
+#
+#             # Finally, for other cases, we set the status to "in progress"
+#             survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+#
+#             # We concatenate the scheme name with the installation status
+#             survey_list["installation_status"] = (
+#                 survey_list[scheme_column] + " - " + survey_list["installation_status"]
+#             )
+#
+#             # TODO: END TEMP
+#
+#             survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy()
+#             survey_list_to_merge["is_remaining"] = False
+#             analysis_data = analysis_data.drop(columns="is_remaining").merge(
+#                 survey_list_to_merge,
+#                 how="left", on="asset_list_row_id"
+#             )
+#             analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True)
+#
+#             n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0]
+#             n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0]
+#
+#         # Take just remaining
+#         analysis_data = analysis_data[analysis_data["is_remaining"]]
+#
+#         # Also, if the HA has started selling, we remove any that are still subject to ciga
+#         n_eco4_missed_subject_to_ciga = 0
+#         if not inputs["survey_list"].empty:
+#             n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum()
+#             analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"]
+#
+#         ################################################################################################
+#         # We take the properties that strictly qualified under eco
+#         ################################################################################################
+#
+#         eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
+#         eco4_identified["identification_type"] = None
+#         eco4_identified["identification_type"] = np.where(
+#             (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
+#             "strict",
+#             eco4_identified["identification_type"]
+#         )
+#
+#         # For expansive, the property can be no higher than an EPC C
+#         eco4_identified["identification_type"] = np.where(
+#             (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & (
+#                 eco4_identified["sap"] <= epc_c_threshold
+#             ),
+#             "expansive",
+#             eco4_identified["identification_type"]
+#         )
+#         ################################################################################################
+#         # We take the properties dependent on CIGA
+#         ################################################################################################
+#
+#         ciga_dependent_identified = analysis_data[
+#             analysis_data["ECO Eligibility"].isin(
+#                 [
+#                     "eco4 (subject to ciga)",
+#                     "eco4 - passed ciga"
+#                 ]
+#             )
+#         ].copy()
+#
+#         # These are properties that show filled cavity
+#         ciga_dependent_identified["identification_type"] = None
+#         ciga_dependent_identified["identification_type"] = np.where(
+#             ciga_dependent_identified["eco4_message"].isin(
+#                 [
+#                     "Perfect suitability",
+#                     "Meets cavity and sap",
+#                     "Fails cavity, meets loft, fails SAP",
+#                     "Meets fabric, fails SAP check",
+#                     "Meets cavity, loft borderline, meets sap",
+#                 ]
+#             ) & (ciga_dependent_identified["sap"] <= epc_c_threshold),
+#             "strict",
+#             ciga_dependent_identified["identification_type"]
+#         )
+#
+#         ciga_dependent_identified["identification_type"] = np.where(
+#             ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
+#                 ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
+#             )) & (
+#                 (ciga_dependent_identified["sap"] <= epc_c_threshold) &
+#                 pd.isnull(ciga_dependent_identified["identification_type"])
+#             ),
+#             "expansive",
+#             ciga_dependent_identified["identification_type"]
+#         )
+#
+#         ################################################################################################
+#         # We properties that qualified for gbis
+#         ################################################################################################
+#         gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
+#         gbis_identified["identification_type"] = None
+#         gbis_identified["identification_type"] = np.where(
+#             (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
+#             "strict",
+#             gbis_identified["identification_type"]
+#         )
+#
+#         gbis_identified["identification_type"] = np.where(
+#             (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & (
+#                 pd.isnull(gbis_identified["identification_type"])
+#             ),
+#             "expansive",
+#             gbis_identified["identification_type"]
+#         )
+#
+#         # Finally, we look at the properties that have not been identified by Warmfront
+#         not_identified = analysis_data[
+#             analysis_data["ECO Eligibility"].isin(
+#                 [
+#                     "not eligible"
+#                 ]
+#             )
+#         ].copy()
+#
+#         surplus_eco4 = not_identified[
+#             (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
+#                 ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
+#             ))
+#             ]
+#
+#         surplus_gbis = not_identified[
+#             (not_identified["gbis_eligible"] == True) & (
+#                 ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
+#             ) & (not_identified["sap"] < 69) & (
+#                 (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
+#                 not_identified["walls"].str.contains("partial", case=False, na=False)
+#             )
+#             )
+#             ]
+#         surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
+#
+#         # Output variables - the data was sent to us in December, but the remaining figures are
+#         # what was in November
+#         november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name]
+#
+#         # ECO4
+#         n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0]
+#         november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0)
+#         november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0]
+#         eco4_sales_since_november = n_sold_eco4 - november_eco4_sold
+#
+#         n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
+#         eco4_of_which_identified_strict = (
+#             eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
+#             ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
+#         )
+#         eco4_of_which_identified_expansive = (
+#             eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
+#             ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
+#         )
+#         # GBIS
+#         n_warmfront_identified_gbis = gbis_identified.shape[0]
+#         november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0)
+#         november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0]
+#         gbis_sales_since_november = n_sold_gbis - november_gbis_sold
+#         gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
+#         gbis_of_which_identified_expansive = \
+#             gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
+#
+#         to_append = {
+#             ("", "HA Name"): ha_name,
+#             ("", "# properties in asset list"): n_properties_remaining_in_asset_list,
+#             ############
+#             # ECO4
+#             ############
+#             ("ECO4", "# remaining November file"): november_eco4_remaining,
+#             ("ECO4", "# sold in November file"): november_eco4_sold,
+#             ("ECO4", "# sold (survey list)"): n_sold_eco4,
+#             ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga,
+#             ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4,
+#             ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
+#             ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
+#             ("ECO4", "Of which identified by model - total"): (
+#                 eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+#             ),
+#             ("ECO4", "Additional properties"): surplus_eco4.shape[0],
+#             ############
+#             # GBIS
+#             ############
+#             ("GBIS", "# remaining November file"): november_gbis_remaining,
+#             ("GBIS", "# sold in November file"): november_gbis_sold,
+#             ("GBIS", "# sold (survey list)"): n_sold_gbis,
+#             ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis,
+#             ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
+#             ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
+#             ("GBIS", "Of which identified by model - total"): (
+#                 gbis_of_which_identified_strict + gbis_of_which_identified_expansive
+#             ),
+#             ("GBIS", "Additional properties"): surplus_gbis.shape[0]
+#         }
+#
+#         ha_analysis_results.append(to_append)
+#
+#         # Calculate the revenue results
+#         to_append_revenue = {
+#             ("", "HA Name"): ha_name,
+#             # Eco4 revenue
+#             ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate,
+#             ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate,
+#             ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate,
+#             ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate,
+#             ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate,
+#             ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate,
+#             ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate,
+#             ("ECO4", "Of which identified by model - total"): eco4_rate * (
+#                 eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+#             ),
+#             ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0],
+#         }
+#         total_revenue_results.append(to_append_revenue)
+#
+#     ha_analysis_results = pd.DataFrame(ha_analysis_results)
+#     ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
+#
+#     facts_and_figures = loader.facts_and_figures.copy()
+#     facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
+#     facts_and_figures = facts_and_figures.sort_values("ha_number")
+#     facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
+#
+#     # Rename some of the cols
+#     facts_and_figures = facts_and_figures.rename(
+#         columns={
+#             # ECO4 cols
+#             "ECO4": "ECO4 - November",
+#             "GBIS": "GBIS - November",
+#             "eco4 (subject to ciga)": "ECO4 - subject to ciga",
+#             "eco4": "ECO4 - doesn't need CIGA",
+#             "eco4 - passed ciga": "ECO4 - passed CIGA",
+#             "failed ciga": "ECO4 - failed CIGA",
+#             "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
+#             "ECO4 - in progress": "ECO4 - Install in progress",
+#             "ECO4 - cancelled": "ECO4 - Install cancelled",
+#             # GBIS cols
+#             "gbis": "GBIS total (asset list)"
+#         }
+#     )
+#     # We calculate the eco4 total from the asset list
+#     # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
+#     # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
+#     # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
+#     # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
+#     facts_and_figures["ECO4 total (asset list - pre ciga)"] = (
+#         facts_and_figures["ECO4 - doesn't need CIGA"] +
+#         facts_and_figures["ECO4 - subject to ciga"] +
+#         facts_and_figures["ECO4 - passed CIGA"]
+#     )
+#
+#     facts_and_figures["ECO4 total (asset list - post ciga)"] = None
+#     facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where(
+#         facts_and_figures["ECO4 - passed CIGA"] > 0,
+#         facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
+#         facts_and_figures["ECO4 total (asset list - post ciga)"]
+#     )
+#
+#     # Re-arrange the columns
+#     facts_and_figures = facts_and_figures[
+#         [
+#             'HA Name',
+#             'ECO4 - November',
+#             'GBIS - November',
+#             'ECO4 total (asset list - pre ciga)',
+#             'ECO4 total (asset list - post ciga)',
+#             'GBIS total (asset list)',
+#             'ECO4 - subject to ciga',
+#             "ECO4 - doesn't need CIGA",
+#             'ECO4 - passed CIGA',
+#             'ECO4 - failed CIGA',
+#             'ECO4 - installed',
+#             'ECO4 - Install in progress',
+#             'ECO4 - Install cancelled',
+#             'ECO4 - partially installed',
+#             'ECO4 - Install downgrade to GBIS',
+#         ]
+#     ]
+#     # Addd a note to flag any rows where ECO4 (
+#     # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
+#     # )
+#     facts_and_figures["Missed CIGA checks opportunity"] = None
+#     facts_and_figures["Missed CIGA checks opportunity"] = np.where(
+#         (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
+#         "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
+#             str) + " ECO4 properties needing a CIGA check",
+#         facts_and_figures["Missed CIGA checks opportunity"]
+#     )
+#
+#     facts_and_figures.to_csv("Facts and figures sample.csv")
+#
+#     # Re arrage the columns
+#
+#     # Also sort ha_analysis_results by ha number
+#     ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
+#     ha_analysis_results = ha_analysis_results.sort_values("ha_number")
+#     ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
+#
+#     # We save 2 sheets
+#     # Automate creation of the excel
+#     # Create a Pandas Excel writer using XlsxWriter as the engine
+#     with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
+#         # Write each dataframe to a different worksheet without the index
+#         for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
+#                           (ha_analysis_results, 'Asset Identification')]:
+#
+#             df.to_excel(writer, sheet_name=sheet)
+#
+#             # Auto-adjust columns' width
+#             for i, width in enumerate(get_col_widths(df)):
+#                 writer.sheets[sheet].set_column(i, i, width)
+#
+#     # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their
+#     #               description, and what proportion of time they get identified via non-invasive surveys
+#
+#     # true_eco4_assets = []
+#     # ciga_dependent_assets = []
+#     # not_eligible = []
+#     # as_built_insulated = []
+#     # date_cols = {
+#     #     "HA39": "date_built",
+#     #     "HA14": "Built In Year",
+#     #     "HA6": "Construction Year",
+#     #     "HA1": "Build Date",
+#     #     "HA107": "YEAR BUILT"
+#     # }
+#     # for ha_name, data_objects in outputs.items():
+#     #     inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+#     #
+#     #     date_col = date_cols[ha_name]
+#     #     results_df = data_objects["results_df"].copy()
+#     #     df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename(
+#     #         columns={"row_meaning": "asset_identification_status", date_col: "date_built"}
+#     #     ).merge(
+#     #         results_df,
+#     #         how="left",
+#     #         right_on="row_id",
+#     #         left_on="asset_list_row_id"
+#     #     )
+#     #
+#     #     # take the true ECO4
+#     #     true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy()
+#     #     ciga_dependent = df[
+#     #         df["ECO Eligibility"].isin(
+#     #             [
+#     #                 "eco4 (subject to ciga)",
+#     #                 "failed ciga",
+#     #                 "eco4 - passed ciga"
+#     #             ]
+#     #         )
+#     #     ]
+#     #     insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy()
+#     #     # We convert date built to datetime
+#     #     try:
+#     #         insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])]
+#     #         insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year
+#     #         as_built_insulated.append(insulated_assumed)
+#     #     except Exception as e:
+#     #         print("oh well")
+#     #
+#     #     true_eco4_assets.append(true_eco4)
+#     #     ciga_dependent_assets.append(ciga_dependent)
+#     #
+#     # true_eco4_assets = pd.concat(true_eco4_assets)
+#     # ciga_dependent_assets = pd.concat(ciga_dependent_assets)
+#     # as_built_insulated = pd.concat(as_built_insulated)
+#     #
+#     # true_eco4_assets["walls"].value_counts(normalize=True)
+#     # ciga_dependent_assets["walls"].value_counts(normalize=True)
+#     #
+#     # from recommendations.recommendation_utils import extract_insulation_thickness
+#     #
+#     # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply(
+#     #     lambda x: extract_insulation_thickness(x)
+#     # )
+#     #
+#     # true_eco4_assets["e"] = true_eco4_assets.merge(
+#     #     pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]],
+#     #     how="left",
+#     #     left_on="roof",
+#     #     right_on="original_description"
+#     # )
+#     #
+#     # true_eco4_assets["sap"].mean()
+#     #
+#     # true_eco4_assets["insulation_thickness"].isin(
+#     #     ["250", "150", "200", "100", "75", "50"]
+#     # ).sum() / true_eco4_assets.shape[0]
+#     #
+#     # true_eco4_assets["insulation_thickness"].isin(
+#     #     ["100"]
+#     # ).sum() / true_eco4_assets.shape[0]
+#     #
+#     # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True)
 
-    Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would
-    qualify under the strictest criteria, and mark these as potential additional opportunities.
 
-    :return:
-    """
+def get_propensity_model_data(
+    loader, cleaned, cleaning_data, created_at, photo_supply_lookup,
+    floor_area_decile_thresholds, pull_data=True
+):
+    # TODO: Set a seed!
+    model_data = []
+    for ha_name, data_assets in loader.data.items():
 
-    eco4_rate = 1710
-    gbis_rate = 600
+        logger.info("Processing HA: %s", ha_name)
+        if data_assets["survey_list"].empty:
+            continue
 
-    ha_analysis_results = []
-    ha_revenue_results = []
-    for ha_name, datasets in outputs.items():
+        number_sold = data_assets["survey_list"].shape[0]
+
+        # For each HA, we read pull in the data required, and store in S3
+        asset_list = data_assets["asset_list"].copy()
+        # We determine the number of properties that we should select that are eligible
+        asset_list_size = asset_list.shape[0]
+        # Number eligible
+        n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0]
+        success_rate = n_eligibile / asset_list_size
+        needed_sample_size = np.ceil(number_sold / success_rate)
+        number_negative_samples = int(needed_sample_size - number_sold)
+
+        sold_asset_list_ids = data_assets["survey_list"]["asset_list_row_id"].tolist()
+        negative_sample_asset_list_ids = asset_list["asset_list_row_id"].sample(number_negative_samples).tolist()
+        sample_ids = sold_asset_list_ids + negative_sample_asset_list_ids
+
+        sample_asset_list = asset_list[asset_list["asset_list_row_id"].isin(sample_ids)]
+
+        # In order to have the most confidence, we should take just properties that have 1 EPC. We might need to
+        # cut down the number of properties that we include because of this
+        # Note: This is an imbalanced problem so we will need to build a model accomadating of that
+
+        data = []
+        errors = []
+        for index, property_meta in tqdm(sample_asset_list.iterrows(), total=len(sample_asset_list)):
+
+            if property_meta["matching_postcode"] is None:
+                continue
+
+            property_type, built_form = get_property_type_and_built_form(
+                property_meta=property_meta, ha_name=ha_name
+            )
+
+            searcher = SearchEpc(
+                address1=str(property_meta["HouseNo"]),
+                postcode=property_meta["matching_postcode"],
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                full_address=property_meta["matching_address"]
+            )
+            searcher.ordnance_survey_client.property_type = property_type
+            searcher.ordnance_survey_client.built_form = built_form
+            searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                continue
+
+            if searcher.newest_epc.get("estimated"):
+                # We insert the row ID as our proxy for UPRN
+                searcher.newest_epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
+
+            newest_epc = searcher.newest_epc
+            older_epcs = searcher.older_epcs
+            full_sap_epc = searcher.full_sap_epc
+
+            # If we have more than 1 EPC for the moment we just continue
+            if older_epcs or full_sap_epc:
+                continue
+            try:
+
+                # We clean up the data
+                epc_records = {
+                    'original_epc': newest_epc.copy(),
+                    'full_sap_epc': full_sap_epc.copy(),
+                    'old_data': older_epcs.copy(),
+                }
+
+                epc_record = EPCRecord(
+                    epc_records=epc_records,
+                    run_mode="newdata",
+                    cleaning_data=cleaning_data
+                )
+
+                # If we have some data, continue
+                data.append(
+                    {
+                        "ECO Eligibility": property_meta["ECO Eligibility"],
+                        "asset_list_row_id": property_meta["asset_list_row_id"],
+                        **epc_record.get("prepared_epc")
+                    }
+                )
+            except Exception as e:
+                errors.append(
+                    {
+                        "error": str(e),
+                        "asset_list_row_id": property_meta["asset_list_row_id"],
+                        "matching_postcode": property_meta["matching_postcode"],
+                        "matching_address": property_meta["matching_address"]
+                    }
+                )
+
+        data = pd.DataFrame(data)
+        # We store the results in S3 as a pickle
+        save_pickle_to_s3(
+            data=data,
+            bucket_name="retrofit-datalake-dev",
+            s3_file_name=f"propensity_model_data/{ha_name}/train.pickle"
+        )
+
+        # Store the errors
+        if errors:
+            save_pickle_to_s3(
+                data=errors,
+                bucket_name="retrofit-datalake-dev",
+                s3_file_name=f"propensity_model_data/{ha_name}/errors.pickle"
+            )
+
+        model_data.append(data)
+
+    return model_data
+
+
+def conversion_model(loader):
+    # Read in the model data
+
+    model_data = []
+    for ha_name in loader.data.keys():
+        try:
+            picked = read_pickle_from_s3(
+                bucket_name="retrofit-datalake-dev",
+                s3_file_name=f"propensity_model_data/{ha_name}/train.pickle"
+            )
+            data = pd.DataFrame(picked)
+
+            # We merge on the sales data
+            sales_data = loader.data[ha_name]["survey_list"].copy()
+            data = data.merge(
+                sales_data[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            data["ha_name"] = ha_name
+
+        except Exception as e:
+            logger.error("Error reading in the data for %s", ha_name)
+            continue
+
+        model_data.append(data)
+
+    model_data = pd.concat(model_data)
+
+    model_data["response"] = model_data["installation_status"].isin(
+        [
+            "ECO4 - in progress",
+            "ECO4 - installed"
+        ]
+    ).astype(int)
+
+    # Because of how we pulled the data, we need to re-balance the sample
+    ha_names = model_data["ha_name"].unique()
+
+    balanced_sample = []
+    for ha_name in ha_names:
+        df = model_data[model_data["ha_name"] == ha_name]
+        positive_samples = df[df["response"] == 1]
+        negative_samples = df[df["response"] != 1]
 
         inputs = [x for k, x in loader.data.items() if k == ha_name][0]
-        # TODO: This is placeholder because we don't have the schemes that the properties have been qualified for
-        #       yet
-        #
-        import random
-        randomly_allocated_schemes = random.choices(["ECO4", "GBIS"], k=inputs["asset_list"].shape[0])
-        inputs["asset_list"]["randomly_allocated_schemes"] = randomly_allocated_schemes
-        inputs["asset_list"]["funding_scheme"] = None
-        inputs["asset_list"]["funding_scheme"] = np.where(
-            inputs["asset_list"]["row_meaning"] == "identified potential eco works (CWI)",
-            inputs["asset_list"]["randomly_allocated_schemes"],
-            inputs["asset_list"]["funding_scheme"]
-        )
+        asset_list = inputs["asset_list"].copy()
+        asset_list_size = asset_list.shape[0]
+        n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0]
+        success_rate = n_eligibile / asset_list_size
+        needed_sample_size = np.ceil(positive_samples.shape[0] / success_rate)
+        number_negative_samples = int(needed_sample_size - positive_samples.shape[0])
+        negative_samples_subset = negative_samples.sample(number_negative_samples)
 
-        # TODO: Also temp, just for HA 6
-        if ha_name == "ha_6":
-            inputs["survey_list"]["funding_scheme"] = None
-            inputs["survey_list"]["funding_scheme"] = np.where(
-                inputs["survey_list"][
-                    'AFFORDABLE WARMTH                 OR EPC FOR HOUSING ASSOCIATION '] == "AFFORDABLE WARMTH",
-                "ECO4",
-                "GBIS"
-            )
+        output = pd.concat([positive_samples, negative_samples_subset])
 
-        # End placholder
+        balanced_sample.append(output)
 
-        results_df = datasets["results_df"].copy()
+    balanced_sample = pd.concat(balanced_sample)
 
-        analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename(
-            columns={"row_meaning": "asset_identification_status"}
-        ).merge(
-            results_df,
-            how="left",
-            right_on="row_id",
-            left_on="asset_list_row_id"
-        )
-
-        # We now merge the survey list onto the analysis data and remove anything that is sold, to give us just what is
-        # remaining
-
-        if inputs["matched_lookup"] is not None:
-            analysis_data = analysis_data.merge(
-                inputs["matched_lookup"], how="left", on="asset_list_row_id"
-            )
-            # Drop any rows that have a survey_list_row_id
-            analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])]
-
-        # If we have a survey list, we merge this onto the results
-        n_properties_in_asset_list = analysis_data["asset_list_row_id"].nunique()
-
-        properties_sold = (
-            inputs["survey_list"].groupby("funding_scheme")["survey_list_row_id"].nunique().reset_index() if
-            inputs["survey_list"] is not None else pd.DataFrame(columns=["funding_scheme"])
-        )
-        properties_sold_eco4 = (
-            properties_sold[properties_sold["funding_scheme"] == "ECO4"]["survey_list_row_id"].values[0] if
-            (not properties_sold.empty) and ("ECO4" in properties_sold["funding_scheme"].values) else 0
-        )
-        properties_sold_gbis = (
-            properties_sold[properties_sold["funding_scheme"] == "GBIS"]["survey_list_row_id"].values[0] if
-            (not properties_sold.empty) and ("GBIS" in properties_sold["funding_scheme"].values) else 0
-        )
-
-        # We now calculate the number of remaining properties, by scheme
-        remaining_properties = analysis_data[
-            analysis_data["asset_identification_status"] == "identified potential eco works (CWI)"
-            ].copy()
-        remaining_properties["prospect_type"] = None
-
-        remaining_properties_by_scheme = (
-            remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index()
-        )
-
-        n_remaining_properties_eco4 = remaining_properties_by_scheme[
-            remaining_properties_by_scheme["funding_scheme"] == "ECO4"
-            ]["asset_list_row_id"].values[0]
-
-        n_remaining_properties_gbis = remaining_properties_by_scheme[
-            remaining_properties_by_scheme["funding_scheme"] == "GBIS"
-            ]["asset_list_row_id"].values[0]
-
-        # For the remaining properties, we use the results of the eligibility process to classify the property into
-        # one of multiple categories
-        #
-        # For properties that have been identified as ECO4
-        # 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because
-        #    Warmfront regularly re-surveys properties which then fall within the SAP requirement
-        #    - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties
-        #      here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have
-        #      very old EPCs which may score lower when re-done
-        # 2) Meets Fabric requirements, not SAP
-        #    Warmfront has identified the property as eligible, but the EPC is not D or below. We consider this but
-        #    label is separately as not a strict
-        # 3) Subject to CIGA check - Meets loft conditions but shows a filled cavity.
-        #    - we don't have a SAP constraint here because the EPC is (currently) showing what the property might
-        #      actually look like after retrofit and so the EPC currently being a C or above means little, because
-        #      the updated EPC, showing an empty cavity, could bring the property within
-        # 4) Loft insulation too thick - Meets empty cavity but shows a loft with between 101 and 270mm insulation.
-        #   - No SAP constraint, for the same reason as in category 2)
-        # 5) Looks like GBIS instead
-        # 6) Does not look like ECO4 candidate
-        #
-        # For properties that have been identified as GBIS
-        # 1) Strict GBIS candidates
-        # 2) Properties that actually look like strict GBIS candidates
-        # 3) Subject to CIGA check - Filled cavity
-        # 4) Does not look like a GBIS candidate
-
-        remaining_eco4_df = remaining_properties[
-            remaining_properties["funding_scheme"] == "ECO4"
-            ].copy()
-
-        ####################################
-        # ECO4
-        ####################################
-
-        # 1) We identify this if:
-        #   - remaining_properties["eco4_eligible"] == True
-
-        remaining_eco4_df["prospect_type"] = np.where(
-            (remaining_eco4_df["eco4_eligible"] == True),
-            "strict ECO4",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 2) Meets fabric requirements
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                (remaining_eco4_df["eco4_message"] == "sap too high") &
-                remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) &
-                pd.isnull(remaining_eco4_df["prospect_type"])
-            ),
-            "ECO4 if SAP downgrade",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 3) We identify this if it has a filled cavity but meets the loft conditions
-        # TODO: Consider if we should also allow 100-270mm or if we should add some slight tolerance (e.g. 150mm)
-        #       to account for measurement error
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
-            ),
-            "ECO4 - Filled cavity - subject to CIGA check",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 4) We identify this by ensuring the cavity if empty or partial, and the loft has between 101 and 270mm
-        remaining_eco4_df["prospect_type"] = np.where(
-            (
-                remaining_eco4_df["eligibility_cavity_type"].isin(["empty", "partial"]) &
-                remaining_eco4_df["eligibility_loft_type"].isin(["100-270mm"])
-            ),
-            "ECO4 prospect - empty cavity, loft insulation below regulation",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 5) Looks like GBIS instead
-        remaining_eco4_df["prospect_type"] = np.where(
-            (remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]),
-            "Looks like GBIS",
-            remaining_eco4_df["prospect_type"]
-        )
-
-        # 6) This is everything else (i.e. both the cavity is full and the loft insulation is above 100mm)
-        remaining_eco4_df["prospect_type"] = remaining_eco4_df["prospect_type"].fillna(
-            "Does not look like ECO4 candidate"
-        )
-
-        ####################################
-        # GBIS
-        ####################################
-
-        remaining_gbis = remaining_properties[
-            remaining_properties["funding_scheme"] == "GBIS"
-            ].copy()
-
-        # 1) Strict GBIS candidates
-        remaining_gbis["prospect_type"] = np.where(
-            (
-                (remaining_gbis["gbis_eligible"] == True) & (remaining_gbis["eco4_eligible"] == False)
-            ),
-            "strict GBIS",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 2) GBIS candidates that look like strict ECO4 candidates
-        remaining_gbis["prospect_type"] = np.where(
-            (remaining_gbis["eco4_eligible"] == True),
-            "GBIS - Upgradable to ECO4",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 3) Subject to CIGA check - Filled cavity
-        remaining_gbis["prospect_type"] = np.where(
-            (
-                remaining_gbis["eligibility_cavity_type"].isin(["full"]) &
-                pd.isnull(remaining_gbis["prospect_type"])
-            ),
-            "GBIS - Filled cavity - subject to CIGA check",
-            remaining_gbis["prospect_type"]
-        )
-
-        # 4) Everything else
-        remaining_gbis["prospect_type"] = remaining_gbis["prospect_type"].fillna(
-            "Does not look like GBIS candidate"
-        )
-
-        ####################################
-        # Surplus properties
-        ####################################
-
-        # Take properties that were not identified by Warmfront and identify those that look like they would qualify
-        # under the strictest criteria
-        surplus_df = analysis_data[
-            analysis_data["asset_identification_status"] != "identified potential eco works (CWI)"
-            ].copy()
-
-        eco4_surplus = surplus_df[
-            (
-                (surplus_df["eco4_eligible"] == True) & (surplus_df["eco4_message"] == "subject to post retrofit sap") &
-                (
-                    surplus_df["eligibility_classification"].isin(
-                        ["high confidence", "highest confidence", "medium confidence"]
-                    )
-                )
-            )
-        ].copy()
-
-        gbis_surplus = surplus_df[
-            (
-                (surplus_df["gbis_eligible"] == True) & (surplus_df["eco4_eligible"] == False) & (
-                surplus_df["eligibility_cavity_type"].isin(["empty", "partial"])
-            )
-            )
-        ].copy()
-
-        # Perform some checks to make sure we have all of the values
-        remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict()
-        if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]):
-            raise ValueError(
-                "Number of remaining properties does not match the number of properties in remaining ECO4 dict"
-            )
-
-        remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict()
-        if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]):
-            raise ValueError(
-                "Number of remaining properties does not match the number of properties in remaining GBIS dict"
-            )
-
-        to_append = {
-            "ha_name": ha_name,
-            "n_properties_in_asset_list": n_properties_in_asset_list,
-            ############
-            # ECO4
-            ############
-            "properties_sold_eco4": properties_sold_eco4,
-            "n_remaining_properties_eco4": n_remaining_properties_eco4,
-            **remaining_eco4_dict,
-            ############
-            # GBIS
-            ############
-            "properties_sold_gbis": properties_sold_gbis,
-            "n_remaining_properties_gbis": n_remaining_properties_gbis,
-            **remaining_gbis_dict,
-            ############
-            # GBIS
-            ############
-            "n_eco4_surplus": eco4_surplus.shape[0],
-            "n_gbis_surplus": gbis_surplus.shape[0],
-        }
-
-        ha_analysis_results.append(to_append)
-
-        revenue_to_append = {
-            "ha_name": ha_name,
-            "£ Remaining from asset list": (
-                n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate
-            ),
-            "Of which: Strict": (
-                to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate +
-                to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate
-            ),
-            "Of which: Subject to CIGA": (
-                to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate +
-                to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate
-            ),
-            "Of which: Prospect, not perfect strict prospect": (
-                to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate +
-                to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate
-            ),
-            "Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate,
-            "Of which: Does not look like prospect": (
-                to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate +
-                to_append.get("Does not look like GBIS candidate", 0) * gbis_rate
-            ),
-            "Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate,
-            "Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate)
-        }
-
-        # Perform a quick check:
-        if revenue_to_append["£ Remaining from asset list"] - (
-            revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] +
-            revenue_to_append["Of which: Prospect, not perfect strict prospect"] +
-            revenue_to_append["Of which: Potential downgrade to GBIS"] +
-            revenue_to_append["Of which: Does not look like prospect"]
-        ) > 1:
-            raise ValueError("Error between top level revenue figures and breakdown - investigate me")
-
-        ha_revenue_results.append(revenue_to_append)
-
-    ha_analysis_results = pd.DataFrame(ha_analysis_results)
-    ha_revenue_results = pd.DataFrame(ha_revenue_results)
-
-    # Automate creation of the excel
-    # Create a Pandas Excel writer using XlsxWriter as the engine
-    with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer:
-        # Write each dataframe to a different worksheet without the index
-        for df, sheet in [(ha_revenue_results, 'Total Revenue'),
-                          (ha_analysis_results, 'By ECO4 and GBIS')]:
-
-            df.to_excel(writer, sheet_name=sheet, index=False)
-
-            # Auto-adjust columns' width
-            for i, width in enumerate(get_col_widths(df)):
-                writer.sheets[sheet].set_column(i, i, width)
-
-
-def app():
-    """
-    This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
-    Only HA 6 has surveys
-    :return:
-    """
-
-    use_cache = False
-
-    # List all of the data in the folder
-    directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
-
-    files = {
-        "ha_1": {
-            "asset_list": {
-                "filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx",
-                "sheetname": "Energy data"
-            }
-        },
-        "ha_6": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx",
-                "sheetname": "HA 6"
-            },
-            "survey_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx",
-                "sheetname": "HA 6"
-            }
-        },
-        "ha_14": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx",
-                "sheetname": "HA 14"
-            }
-        },
-        "ha_39": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx",
-                "sheetname": "Sheet1"
-            }
-        },
-        "ha_107": {
-            "asset_list": {
-                "filepath": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx",
-                "sheetname": "HA 107"
-            }
-        }
-    }
-
-    loader = DataLoader(directories, use_cache)
-    loader.load()
-
-    # TODO: We probably need to make sure that we have all of the columns that we need
-
-    # We load in the additional data required to perform the analysis
-
-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
+    # We work with a small sample
+    # Drop the ECO Eligibility column and installation_status column
+    # We keep the ID column
+    balanced_sample = balanced_sample.drop(
+        columns=['ECO Eligibility', 'asset_list_row_id', 'address', 'uprn_source', 'address3', 'local_authority_label',
+                 'county', 'postcode', 'constituency', 'local_authority', 'inspection_date', 'address1',
+                 'constituency_label', 'building_reference_number', 'address2', 'posttown', 'lodgement_datetime',
+                 'uprn', 'lodgement_date', 'lmk_key', 'installation_status', 'ha_name']
     )
-    cleaned = msgpack.unpackb(cleaned, raw=False)
 
+    # POC model
+    df = balanced_sample.copy()
+    # FIll missings with means, if they exist
+    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
+    df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
+
+    categorical_cols = df.select_dtypes(include=['object', 'category']).columns
+    df[categorical_cols] = df[categorical_cols].fillna("other")
+
+    # Reduce the number of categories to a specific number and the rest to other
+    max_n_categories = 10
+    for col in categorical_cols:
+        top_categories = df[col].value_counts().nlargest(max_n_categories).index
+        df[col] = df[col].where(df[col].isin(top_categories), other="other")
+
+    # Use a model based approach to feature selection
+    import xgboost as xgb
+    from sklearn.model_selection import train_test_split
+
+    # Assuming your outcome column is named 'target'
+    X = df.drop(columns=['response'])
+    y = df['response']
+    df["low_energy_fixed_light_count"].va
+
+    # Encoding categorical variables if not already done
+    X = pd.get_dummies(X, drop_first=True)
+
+    # Splitting the data into train and test sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+    # Initialize an XGBoost classifier
+    model = xgb.XGBClassifier()
+
+    # Fit the model
+    model.fit(X_train, y_train)
+
+    # Get feature importances
+    feature_importances = model.feature_importances_
+
+    # Map feature importances to their corresponding column names
+    feature_importance_dict = {feature: importance for feature, importance in zip(X.columns, feature_importances)}
+
+    # Sort features by importance
+    sorted_features = sorted(feature_importance_dict.items(), key=lambda item: item[1], reverse=True)
+
+    # Display sorted features
+    for feature, importance in sorted_features:
+        print(f"{feature}: {importance}")
+
+
+def patch_cleaned(cleaned):
     # Patch to handle the a missing description
     cleaned["floor-description"].extend(
         [
@@ -1354,7 +4923,6 @@ def app():
         ]
     )
 
-    # We treat unknown loft insulation as no insulation
     cleaned["roof-description"].extend(
         [
             {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
@@ -1365,6 +4933,76 @@ def app():
         ]
     )
 
+    cleaned["roof-description"].extend(
+        [
+            {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
+             'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_pitched': True,
+             'is_roof_room': False,
+             'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True,
+             'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'}
+        ]
+    )
+
+    cleaned["roof-description"].extend(
+        [
+            {'original_description': 'Pitched, 300+mm loft insulation',
+             'clean_description': 'Pitched, 300+ mm loft insulation', 'thermal_transmittance': None,
+             'thermal_transmittance_unit': None, 'is_pitched': True, 'is_roof_room': False, 'is_loft': True,
+             'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+             'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': '300+'
+             }
+        ]
+    )
+
+    thermal_transmittance_values = list(np.arange(0, 2, 0.01))
+    for ttv in thermal_transmittance_values:
+        ttv_roundeded = round(ttv, 2)
+        # We look for an instance of that thermal transmittance value
+        rec = [
+            x for x in cleaned["roof-description"] if
+            (x["thermal_transmittance"] == ttv_roundeded) and "Average thermal transmittance" in x["clean_description"]
+        ]
+
+        if rec:
+            continue
+        else:
+            # We patch the record
+            cleaned["roof-description"].extend(
+                [{'original_description': f'Average thermal transmittance {ttv_roundeded} W/m-¦K',
+                  'clean_description': f'Average thermal transmittance {ttv_roundeded} w/m-¦k',
+                  'thermal_transmittance': ttv_roundeded,
+                  'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+                  'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+                  'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
+            )
+
+    # We also patch a funny unit value we found
+    for ttv in thermal_transmittance_values:
+        ttv_rounded = round(ttv, 2)
+        # We look for an instance of that thermal transmittance value
+        rec = [
+            x for x in cleaned["roof-description"] if
+            (x["thermal_transmittance"] == ttv_rounded) and "Average thermal transmittance" in x["clean_description"]
+            and x["thermal_transmittance_unit"] == "w/m?K"
+        ]
+
+        if rec:
+            continue
+        else:
+            # We patch the record
+            ttv_string = str(ttv_rounded)
+            if len(ttv_string) == 3:
+                ttv_string = f"{ttv_string}0"
+
+            cleaned["roof-description"].extend(
+                [{'original_description': f'Average thermal transmittance {ttv_string} W/m?K',
+                  'clean_description': f'Average thermal transmittance {ttv_string} w/m-¦k',
+                  'thermal_transmittance': ttv_rounded,
+                  'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+                  'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+                  'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
+            )
+
     # Patch mainheatcont-description
     cleaned["mainheatcont-description"].extend(
         [
@@ -1381,54 +5019,1826 @@ def app():
             x["another_property_below"] = True
             x["thermal_transmittance"] = 0
 
-    cleaning_data = read_dataframe_from_s3_parquet(
-        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+    return cleaned
+
+
+def calculate_eco4_post_ciga(
+    eligiblity_counts, input_data, ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate,
+    eco4_rate, archetype_conversion_rate
+):
+    remaining_needing_ciga_check = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") &
+        ~eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype")
+        ]["count"].sum()
+
+    remaining_needing_ciga_and_archetype_check = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") &
+        eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype")
+        ]["count"].sum()
+    # We scale this down by the archetype_conversion_rate, and add this on to the remaining_needing_ciga_check
+    remaining_needing_ciga_and_archetype_check_passed = np.round(
+        remaining_needing_ciga_and_archetype_check * archetype_conversion_rate
     )
 
-    created_at = datetime.now().isoformat()
+    remaining_needing_ciga_check += remaining_needing_ciga_and_archetype_check_passed
 
-    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+    eco4_no_ciga_needed = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"] == "eco4"
+        ]["count"].sum()
 
-    outputs = get_epc_data(
-        loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=False
+    eco4_no_ciga_archetype_needed = eligiblity_counts[
+        eligiblity_counts["ECO Eligibility"] == "eco4 (subject to archetype)"
+        ]["count"].sum()
+    eco4_no_ciga_archetype_needed_passed = np.round(
+        eco4_no_ciga_archetype_needed * archetype_conversion_rate
     )
 
-    # for ha_name, datasets in outputs.items():
-    #     datasets["results_df"] = datasets["results_df"].drop(
-    #         columns=["eligibility_cavity_type", "eligibility_loft_type"]
-    #     )
+    eco4_no_ciga_needed += eco4_no_ciga_archetype_needed_passed
+
+    failed_archetype_check = int(
+        remaining_needing_ciga_and_archetype_check +
+        eco4_no_ciga_archetype_needed -
+        remaining_needing_ciga_and_archetype_check_passed -
+        eco4_no_ciga_archetype_needed_passed
+    )
+
+    has_ciga_check = not input_data["ciga_list"].empty
+    if has_ciga_check:
+
+        eco4_ciga_passed = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga"
+            ]["count"].sum()
+
+        eco4_confirmed_ciga_failures = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "failed ciga"
+            ]["count"].sum()
+
+        eco4_no_ciga_needed_or_ciga_passed = eco4_no_ciga_needed + eco4_ciga_passed
+
+        eco4_confirmed = np.round(
+            (eco4_no_ciga_needed * ha_eco4_to_sale_rate) +
+            (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
+        )
+
+        eco4_no_ciga_needed_cancellations = int(eco4_no_ciga_needed_or_ciga_passed - eco4_confirmed)
+
+        if remaining_needing_ciga_check > 0:
+            # We update the eco4 post ciga with the converted remaining
+            eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+
+            eco4_remaining_forecast = np.round(
+                eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
+            )
+            eco4_ciga_needed_cancellations = eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast
+            eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
+            eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
+        else:
+            eco4_remaining_forecast = 0
+            eco4_estimated_ciga_failures = 0
+            eco4_ciga_needed_cancellations = 0
+            eco4_post_ciga = eco4_confirmed
+
+        eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations
+    else:
+        eco4_confirmed_ciga_failures = 0
+        # Multiply by sale conversion
+        eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
+        eco4_no_ciga_cancellations = int(eco4_no_ciga_needed - eco4_confirmed)
+        eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+        eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
+
+        eco4_remaining_forecast = np.round(
+            eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
+        )
+        eco4_ciga_cancellations = int(eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast)
+        eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
+
+        eco4_expected_cancellations = eco4_no_ciga_cancellations + eco4_ciga_cancellations
+
+    eco4_post_ciga = int(eco4_post_ciga)
+    eco4_remaining_forecast = int(eco4_remaining_forecast)
+    eco4_confirmed = int(eco4_confirmed)
+
+    results = {
+        # Counts
+        "ECO4 - post CIGA - #": eco4_post_ciga,
+        "Of which confirmed - #": eco4_confirmed,
+        "Of which forecast - #": eco4_remaining_forecast,
+        # Revenue
+        "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate,
+        "Of which confirmed - £": eco4_confirmed * eco4_rate,
+        "Of which forecast - £": eco4_remaining_forecast * eco4_rate,
+        # Archetype check failures
+        "Estimated total - failed archetype check - #": failed_archetype_check,
+        "Estimated total - failed archetype check - £": failed_archetype_check * eco4_rate,
+        # Ciga failures
+        "Estimated total - failed CIGA": int(eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures),
+        "Confirmed CIGA failures": eco4_confirmed_ciga_failures,
+        "Estimated CIGA failures": int(eco4_estimated_ciga_failures),
+        # Ciga failures cost
+        "Estimated total - failed CIGA - £": int(
+            (eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures) * eco4_rate
+        ),
+        "Confirmed CIGA failures - £": int(eco4_confirmed_ciga_failures * eco4_rate),
+        "Estimated CIGA failures - £": int(eco4_estimated_ciga_failures * eco4_rate),
+        # Expected cencellations
+        "Expected cancellations - #": eco4_expected_cancellations,
+        "Expected cancellations - £": eco4_expected_cancellations * eco4_rate
+    }
+
+    return results
+
+
+def forecast_remaining_sales(loader):
+    # Assumptions:
+    # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
+    # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
+    maximum_ciga_conversion = 0.75
+
+    # This is a hard limit to the allowed conversion rates to final sale. These are typically very
+    # high but there are some anomalies, amongst surveys that are early on
+    sales_conversion_lower_bound = 0.8
+
+    gbis_rate = 600
+    eco4_rate = 1710
+
+    # Based on ONS https://www.ons.gov.uk/peoplepopulationandcommunity/housing/bulletins/housingenglandandwales
+    # /census2021
+    # there are 5.7 million terraced properties in the UK, of the 19.3 million houses or bungalows. We therefore apply
+    # a 30% discount to homes that are dependent on an archetype check, since around 30% of them will be mid terraced
+    # This 30% is slightly harsh but we be conservative
+    # Therefore, the archetype check conversion rate is 70%
+    archetype_conversion_rate = 0.7
+
+    # 1) Calculate the conversion rate from passed CIGA to actual sale
+    converted_ciga_jobs = []
+    for ha_name, input_data in loader.data.items():
+        asset_list = input_data["asset_list"].copy()
+        survey_list = input_data["survey_list"].copy()
+
+        if survey_list.empty:
+            continue
+
+        ciga_dependent_assets = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 - passed ciga"
+            ]
+
+        # These are now the ciga dependent assets at installation
+        ciga_dependent_assets_at_installation = ciga_dependent_assets.merge(
+            survey_list[["asset_list_row_id", "installation_status"]],
+            how="inner",
+            on="asset_list_row_id"
+        )
+
+        # We then calculate how many get cancelled
+        ciga_dependent_assets_sold = ciga_dependent_assets_at_installation[
+            ciga_dependent_assets_at_installation["installation_status"].isin(
+                [
+                    "ECO4 - installed", "ECO4 - in progress"
+                ]
+            )
+        ]
+
+        ciga_dependent_assets_failed = ciga_dependent_assets_at_installation[
+            ~ciga_dependent_assets_at_installation["installation_status"].isin(
+                [
+                    "ECO4 - installed", "ECO4 - in progress"
+                ]
+            )
+        ]
+
+        converted_ciga_jobs.append(
+            {
+                "HA Name": ha_name,
+                "# Ciga dependent at installation": ciga_dependent_assets_at_installation.shape[0],
+                "# Ciga dependent successfully installed": ciga_dependent_assets_sold.shape[0],
+                "# Ciga dependent failed install": ciga_dependent_assets_failed.shape[0]
+            }
+        )
+
+    converted_ciga_jobs = pd.DataFrame(converted_ciga_jobs)
+
+    # We calculate a ciga pass to install conversaion rate
+    median_ciga_pass_to_install = (
+        converted_ciga_jobs["# Ciga dependent successfully installed"].sum() /
+        converted_ciga_jobs["# Ciga dependent at installation"].sum()
+    )
+
+    # 2) Calculate the conversion rate from CIGA dependent to ciga passed
+    ciga_passrates = []
+    for ha_name, input_data in loader.data.items():
+
+        # If we don't have a ciga list, we can't do anything
+        if input_data["ciga_list"].empty:
+            continue
+
+        # 1) Calculate the conversion rate for CIGA to actual sale
+        asset_list = input_data["asset_list"].copy()
+
+        ciga_completed_assets = asset_list[
+            asset_list["ECO Eligibility"].isin(
+                [
+                    "eco4 - passed ciga",
+                    "failed ciga"
+                ]
+            )
+        ]
+
+        ciga_passed = ciga_completed_assets[
+            ciga_completed_assets["ECO Eligibility"].isin(
+                [
+                    "eco4 - passed ciga"
+                ]
+            )
+        ]
+
+        ciga_passrates.append(
+            {
+                "Ha Name": ha_name,
+                "# CIGA dependent": ciga_completed_assets.shape[0],
+                "# CIGA passed": ciga_passed.shape[0],
+            }
+        )
+
+    ciga_passrates = pd.DataFrame(ciga_passrates)
+
+    median_ciga_success_rate = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
+
+    # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install
+    eco4_ciga_independent_to_install = []
+    gbis_to_install = []
+    for ha_name, input_data in loader.data.items():
+        asset_list = input_data["asset_list"].copy()
+        survey_list = input_data["survey_list"].copy()
+
+        if survey_list.empty:
+            continue
+
+        # For properties that were identified as a typical ECO4 job, we calculate the number of properties that
+        # installed
+        # vs cancelled
+
+        typical_eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"]
+        typical_gbis = asset_list[asset_list["ECO Eligibility"] == "gbis"]
+
+        # Merge on the surveys
+        typical_eco4_installed = typical_eco4.merge(
+            survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id"
+        )
+
+        if not typical_eco4_installed.empty:
+            typical_eco4_sold = typical_eco4_installed[
+                typical_eco4_installed["installation_status"].isin(
+                    [
+                        "ECO4 - installed", "ECO4 - in progress"
+                    ]
+                )
+            ]
+
+            eco4_ciga_independent_to_install.append(
+                {
+                    "Ha Name": ha_name,
+                    "# ECO4 at install stage": typical_eco4_installed.shape[0],
+                    "# ECO4 successfully installed": typical_eco4_sold.shape[0]
+                }
+            )
+
+        typical_gbis_installed = typical_gbis.merge(
+            survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id"
+        )
+        if not typical_gbis_installed.empty:
+            typical_gbis_sold = typical_gbis_installed[
+                typical_gbis_installed["installation_status"].isin(
+                    [
+                        "GBIS - in progress", "GBIS - installed"
+                    ]
+                )
+            ]
+
+            gbis_to_install.append(
+                {
+                    "Ha Name": ha_name,
+                    "# GBIS at install stage": typical_gbis_installed.shape[0],
+                    "# GBIS successfully installed": typical_gbis_sold.shape[0]
+                }
+            )
+
+    eco4_ciga_independent_to_install = pd.DataFrame(eco4_ciga_independent_to_install)
+    gbis_to_install = pd.DataFrame(gbis_to_install)
+
+    eco4_ciga_independent_to_install["conversion"] = (
+        eco4_ciga_independent_to_install["# ECO4 successfully installed"] /
+        eco4_ciga_independent_to_install["# ECO4 at install stage"]
+    )
+    eco4_ciga_independent_to_install_clipped = eco4_ciga_independent_to_install[
+        eco4_ciga_independent_to_install["conversion"] >= sales_conversion_lower_bound
+        ]
+
+    gbis_to_install["conversion"] = (
+        gbis_to_install["# GBIS successfully installed"] /
+        gbis_to_install["# GBIS at install stage"]
+    )
+    gbis_to_install_clipped = gbis_to_install[
+        gbis_to_install["conversion"] >= sales_conversion_lower_bound
+        ]
+
+    median_eco4_to_install = (
+        eco4_ciga_independent_to_install_clipped["# ECO4 successfully installed"].sum() /
+        eco4_ciga_independent_to_install_clipped["# ECO4 at install stage"].sum()
+    )
+
+    median_gbis_to_install = (
+        gbis_to_install_clipped["# GBIS successfully installed"].sum() /
+        gbis_to_install_clipped["# GBIS at install stage"].sum()
+    )
+
+    # Produce the final output
+    december_figures = loader.december_figures.copy()
+    december_figures = december_figures.fillna(0)
+    # If we have negative remaining, it means that actually sold more gbis than they initially thought so we set
+    # remaining to 0
+    december_figures["ECO4 remaining"] = np.where(
+        december_figures["ECO4 remaining"] < 0, 0, december_figures["ECO4 remaining"]
+    )
+    december_figures["GBIS remaining"] = np.where(
+        december_figures["GBIS remaining"] < 0, 0, december_figures["GBIS remaining"]
+    )
+
+    results = []
+    for ha_name, input_data in loader.data.items():
+
+        # Original warmfront figures - ECO4
+        original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
+        if original_warmfront_estimates.empty:
+            # Append an empty row
+            original_warmfront_estimates = december_figures.head(1).copy()
+            for k in original_warmfront_estimates.columns:
+                original_warmfront_estimates[k] = 0
+            original_warmfront_estimates["HA Name"] = ha_name
+
+        original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
+        original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
+        original_warmfront_sold_eco4 = (
+            original_warmfront_estimates["No. of Tech surveys complete - Eco 4"].values[0] * eco4_rate
+        )
+
+        original_warmfront_eco4_revenue = original_warmfront_eco4 * eco4_rate
+        original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
+        original_warmfront_sold_gbis = (
+            original_warmfront_estimates["No. of Tech surveys complete - GBIS"].values[0] * gbis_rate
+        )
+
+        # Original warmfront figures - GBIS
+
+        original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
+        original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
+
+        original_warmfront_gbis_revenue = (
+            original_warmfront_gbis * gbis_rate
+        )
+        original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate
+
+        # Asset list - ECO4
+        asset_list = input_data["asset_list"].copy()
+        survey_list = input_data["survey_list"].copy()
+
+        if survey_list.empty:
+            asset_list_remaining = asset_list.copy()
+        else:
+            # For HA6, there are a small number of postcodes that do not match to any item in the asset list
+            survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
+            asset_list_remaining = asset_list.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
+            asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
+
+        eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
+        eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
+
+        eco4_pre_ciga = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"].isin(
+                [
+                    "eco4",
+                    "eco4 (subject to ciga)",
+                    "eco4 - passed ciga",
+                    "failed ciga",
+                    "eco4 (subject to ciga) (subject to archetype)",
+                    "eco4 (subject to archetype)"
+                ]
+            )
+        ]["count"].sum()
+
+        eco4_pre_ciga_remaining = eligiblity_counts_remaining[
+            eligiblity_counts_remaining["ECO Eligibility"].isin(
+                [
+                    "eco4",
+                    "eco4 (subject to ciga)",
+                    "eco4 - passed ciga",
+                    "failed ciga",
+                    "eco4 (subject to ciga) (subject to archetype)",
+                    "eco4 (subject to archetype)"
+                ]
+            )
+        ]["count"].sum()
+
+        eco4_pre_ciga_revenue = eco4_pre_ciga * eco4_rate
+        eco4_pre_ciga_remaining_revenue = eco4_pre_ciga_remaining * eco4_rate
+
+        # Total Eligible - this is what passed ciga checks + strict. If we don't have what passed CIGA, we estimate
+        # We check if the HA has done a CIGA check. Also, if we have assets dormant at CIGA, we estimate what will
+        # convert
+        # We estimate a conversion for anything left post CIGA
+        ha_ciga_conversion = ciga_passrates[ciga_passrates["Ha Name"] == ha_name]
+        if not ha_ciga_conversion.empty:
+            ha_ciga_conversion_rate = (
+                ha_ciga_conversion["# CIGA passed"].values[0] / ha_ciga_conversion["# CIGA dependent"].values[0]
+            )
+        else:
+            ha_ciga_conversion_rate = (
+                median_ciga_success_rate if median_ciga_success_rate <= maximum_ciga_conversion else
+                maximum_ciga_conversion
+            )
+
+        # We also need the ha ciga passed to install success rate
+        ha_ciga_pass_to_sale = converted_ciga_jobs[converted_ciga_jobs["HA Name"] == ha_name]
+        if not ha_ciga_pass_to_sale.empty and ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0] != 0:
+            ha_ciga_pass_to_sale_rate = (
+                ha_ciga_pass_to_sale["# Ciga dependent successfully installed"].values[0] /
+                ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0]
+            )
+        else:
+            ha_ciga_pass_to_sale_rate = median_ciga_pass_to_install
+
+        ha_eco4_to_sale = eco4_ciga_independent_to_install_clipped[
+            eco4_ciga_independent_to_install_clipped["Ha Name"] == ha_name
+            ]
+        if not ha_eco4_to_sale.empty:
+            ha_eco4_to_sale_rate = (
+                ha_eco4_to_sale['# ECO4 successfully installed'].values[0] /
+                ha_eco4_to_sale['# ECO4 at install stage'].values[0]
+            )
+        else:
+            ha_eco4_to_sale_rate = median_eco4_to_install
+
+        eco4_post_ciga_total_results = calculate_eco4_post_ciga(
+            eligiblity_counts=eligiblity_counts,
+            input_data=input_data,
+            ha_ciga_conversion_rate=ha_ciga_conversion_rate,
+            ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
+            ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
+            eco4_rate=eco4_rate,
+            archetype_conversion_rate=archetype_conversion_rate
+        )
+
+        eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
+            eligiblity_counts=eligiblity_counts_remaining,
+            input_data=input_data,
+            ha_ciga_conversion_rate=ha_ciga_conversion_rate,
+            ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
+            ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
+            eco4_rate=eco4_rate,
+            archetype_conversion_rate=archetype_conversion_rate
+        )
+
+        # Calculate the delta compared to Warmfront's original remaining
+        if original_warmfront_remaining_eco4 == 0:
+            eco4_delta_vs_original_estimate_remaining = "N/A"
+        else:
+            eco4_delta_vs_original_estimate_remaining = ((eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] -
+                                                          original_warmfront_remaining_eco4) /
+                                                         original_warmfront_remaining_eco4)
+
+        # GBIS Figures
+        # Estimate the GBIS conversion rate
+        ha_gbis_sale_conversion = gbis_to_install_clipped[
+            gbis_to_install_clipped["Ha Name"] == ha_name
+            ]
+
+        if not ha_gbis_sale_conversion.empty:
+            ha_gbis_sale_conversion = (
+                ha_gbis_sale_conversion["# GBIS successfully installed"].values[0] /
+                ha_gbis_sale_conversion["# GBIS at install stage"].values[0]
+            )
+        else:
+            ha_gbis_sale_conversion = median_gbis_to_install
+
+        gbis_total_pre_cancellations = eligiblity_counts[
+            eligiblity_counts["ECO Eligibility"] == "gbis"
+            ]["count"].sum()
+
+        gbis_total_pre_cancellations_revenue = gbis_total_pre_cancellations * gbis_rate
+        # gbis_total = int(np.round(gbis_total_pre_cancellations * ha_gbis_sale_conversion))
+        # gbis_total_revenue = int(gbis_total * gbis_rate)
+
+        gbis_remaining_pre_cancellations = eligiblity_counts_remaining[
+            eligiblity_counts_remaining["ECO Eligibility"] == "gbis"
+            ]["count"].sum()
+        gbis_remaining_pre_cancellations_revenue = (
+            gbis_remaining_pre_cancellations * gbis_rate
+        )
+        # This is the gbis jobs we expect to sell
+        gbis_remaining = int(np.round(gbis_remaining_pre_cancellations * ha_gbis_sale_conversion))
+        gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
+        # This is the number we expect to cancel
+        gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining)
+        gbis_remaining_expected_cancellations_revenue = gbis_remaining_expected_cancellations * gbis_rate
+
+        # GBIS delta
+        if original_warmfront_remaining_gbis == 0:
+            gbis_delta_vs_original_estimate_remaining = "N/A"
+        else:
+            gbis_delta_vs_original_estimate_remaining = (
+                (gbis_remaining - original_warmfront_remaining_gbis) / original_warmfront_remaining_gbis
+            )
+
+        # Current sales figures
+        # For any sales surveys that are complete, that could still cancel, we apply a conversion rate
+        eco4_actually_sold = 0
+        eco4_confirmed_cancellations = 0
+        eco4_expected_cancellations = 0
+
+        gbis_actually_sold = 0
+        gbis_confirmed_cancellations = 0
+        gbis_expected_cancellations = 0
+        if not survey_list.empty:
+            surveys_with_eligibility = survey_list.merge(
+                asset_list[["asset_list_row_id", "ECO Eligibility"]],
+                how="left", on="asset_list_row_id"
+            )
+            completed_eco4_sales = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "ECO4 - installed"
+                ].shape[0]
+            incomplete_eco4_sales = surveys_with_eligibility[
+                (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
+                (~surveys_with_eligibility["ECO Eligibility"].isin(
+                    ["eco4 - passed ciga"])
+                 )
+                ].shape[0]
+            incomplete_eco4_sales_ciga = surveys_with_eligibility[
+                (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
+                (surveys_with_eligibility["ECO Eligibility"].isin(
+                    ["eco4 - passed ciga"])
+                )
+                ].shape[0]
+
+            eco4_confirmed_cancellations = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "ECO4 - cancelled"
+                ].shape[0]
+
+            expected_eco4_sales_no_ciga = np.round(incomplete_eco4_sales * ha_eco4_to_sale_rate)
+            expected_eco4_sales_ciga = np.round(incomplete_eco4_sales_ciga * ha_ciga_pass_to_sale_rate)
+
+            eco4_expected_cancellations = (incomplete_eco4_sales + incomplete_eco4_sales_ciga) - (
+                expected_eco4_sales_no_ciga + expected_eco4_sales_ciga
+            )
+            eco4_expected_cancellations = int(np.round(eco4_expected_cancellations))
+
+            eco4_actually_sold = eco4_rate * (
+                completed_eco4_sales + expected_eco4_sales_no_ciga + expected_eco4_sales_ciga
+            )
+
+            completed_gbis_sales = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "GBIS - installed"
+                ].shape[0]
+            incomplete_gbis_sales = surveys_with_eligibility[
+                (surveys_with_eligibility["installation_status"] == "GBIS - in progress")
+            ].shape[0]
+
+            # Get confirmed cancellations
+            gbis_confirmed_cancellations = surveys_with_eligibility[
+                surveys_with_eligibility["installation_status"] == "GBIS - cancelled"
+                ].shape[0]
+
+            expected_gbis_unconfirmed_sales = np.round(incomplete_gbis_sales * ha_gbis_sale_conversion)
+
+            gbis_expected_cancellations = int(incomplete_gbis_sales - expected_gbis_unconfirmed_sales)
+
+            gbis_actually_sold = completed_gbis_sales * gbis_rate + (
+                expected_gbis_unconfirmed_sales * gbis_rate
+            )
+
+        # Add in the variance:
+        # We should expect that the pre-ciga total is:
+        # 1) The number of post CIGA successes +
+        # 2) The number of archetype failures +
+        # 2) the number of CIGA failures +
+        # 3) The number of cancellations
+        variance_total = eco4_pre_ciga - (
+            eco4_post_ciga_total_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_total_results["Estimated total - failed archetype check - #"] +
+            eco4_post_ciga_total_results['Estimated total - failed CIGA'] +
+            eco4_post_ciga_total_results["Expected cancellations - #"]
+        )
+        if variance_total != 0:
+            raise ValueError("Something went wrong in variance total")
+
+        variance_remaining = eco4_pre_ciga_remaining - (
+            eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] +
+            eco4_post_ciga_remaining_results["Estimated total - failed archetype check - #"] +
+            eco4_post_ciga_remaining_results['Estimated total - failed CIGA'] +
+            eco4_post_ciga_remaining_results["Expected cancellations - #"]
+        )
+
+        if variance_remaining != 0:
+            raise ValueError("Something went wrong in variance remaining")
+
+        # We also check variances to make sure that the pre-CIGA ECO4 total equals
+        # 1) Pre CIGA remaining +
+        # 2) ECO4 sold +
+        # 3) ECO4 confirmed cancellations +
+        # 4) ECO4 unconfirmed cancellations
+
+        pre_ciga_eco4_variance = (
+            eco4_pre_ciga_revenue -
+            eco4_pre_ciga_remaining_revenue -
+            eco4_actually_sold -
+            eco4_confirmed_cancellations * eco4_rate -
+            eco4_expected_cancellations * eco4_rate
+        )
+
+        if pre_ciga_eco4_variance != 0:
+            raise ValueError("Something went wrong in pre_ciga_eco4_variance")
+
+        # Check GBIS total variance
+        # The total before cancellations should equal:
+        # The number of sold +
+        # The number of confirmed cancelled +
+        # The number of expected cancelled +
+        # The number of remaining
+        gbis_variance = gbis_total_pre_cancellations - (
+            gbis_actually_sold / gbis_rate +
+            gbis_confirmed_cancellations +
+            gbis_expected_cancellations +
+            gbis_remaining_pre_cancellations
+        )
+
+        if gbis_variance != 0:
+            raise ValueError("Something went wrong in gbis_variance")
+
+        # We expect the remaining to equal expected sales + expected cancellations
+        gbis_variance_2 = gbis_remaining_pre_cancellations - (
+            gbis_remaining +
+            gbis_remaining_expected_cancellations
+        )
+
+        if gbis_variance_2 != 0:
+            raise ValueError("Something went wrong in gbis_variance2")
+
+        # Update the GBIS sold, since Warmfront often sold more GBIS that expected
+        original_warmfront_gbis_revenue = original_warmfront_sold_gbis + original_warmfront_remaining_gbis_revenue
+        original_warmfront_gbis = (
+            original_warmfront_sold_gbis / gbis_rate + original_warmfront_remaining_gbis_revenue / gbis_rate
+        )
+
+        to_append = {
+            ("", "", "", "HA Name"): ha_name,
+            # ECO4 - original warmfront figures
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
+            ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
+            ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue,
+            ("ECO4 original", "", "Sold or cancelled - £", ""): original_warmfront_sold_eco4,
+            ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+            # GBIS - original warmfront figures
+            ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
+            ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis,
+            ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue,
+            ("GBIS original", "", "Sold or cancelled - £", ""): original_warmfront_sold_gbis,
+            ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
+            # ECO4 - asset list, pre-ciga
+            ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
+            ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
+            ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+            ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
+            ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL", ""): pre_ciga_eco4_variance,
+            ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL VS ELIGIBLE & INELIGIBLE", ""): variance_total,
+            ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 REMAINING VS ELIGIBLE & INELIGIBLE", ""):
+                variance_remaining,
+            ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
+            ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate,
+            # This is for jobs that are in-progress and could still cancel
+            ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations * eco4_rate,
+            # ECO4 - asset list, post ciga, total
+            ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"):
+                eco4_post_ciga_total_results[
+                    "ECO4 - post CIGA - #"],
+            ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
+                "ECO4 - post CIGA - £"],
+            # ECO4 - asset list, post ciga, remaining
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
+                "ECO4 - post CIGA - #"],
+            ("ECO4 post-ciga", "", "Estimated remaining eligible - £", ""): eco4_post_ciga_remaining_results[
+                "ECO4 - post CIGA - £"],
+            ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %",
+             ""): eco4_delta_vs_original_estimate_remaining,
+            ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""):
+                eco4_post_ciga_remaining_results["Of which confirmed - #"],
+            ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - £", ""):
+                eco4_post_ciga_remaining_results["Of which confirmed - £"],
+            ("ECO4 post-ciga", "", "Of which forecast - #", ""):
+                eco4_post_ciga_remaining_results["Of which forecast - #"],
+            ("ECO4 post-ciga", "", "Of which forecast - £", ""):
+                eco4_post_ciga_remaining_results["Of which forecast - £"],
+            # Expected ECO4 cancellations
+            ("ECO4 Cancellations", "", "Of which expected cancellations - #", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - #"
+            ],
+            ("ECO4 Cancellations", "", "Of which expected cancellations - £", ""): eco4_post_ciga_remaining_results[
+                "Expected cancellations - £"
+            ],
+            # Archetype check failures
+            ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - #", ""):
+                eco4_post_ciga_remaining_results['Estimated total - failed archetype check - #'],
+            ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - £", ""):
+                eco4_post_ciga_remaining_results['Estimated total - failed archetype check - £'],
+            # CIGA failures
+            ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[
+                'Estimated total - failed CIGA'
+            ],
+            ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - £", ""): eco4_post_ciga_remaining_results[
+                'Estimated total - failed CIGA - £'
+            ],
+            ("ECO4 CIGA failures", "", "Confirmed failures - #", ""): eco4_post_ciga_remaining_results[
+                "Confirmed CIGA failures"
+            ],
+            ("ECO4 CIGA failures", "", "Confirmed failures - £", ""): eco4_post_ciga_remaining_results[
+                "Confirmed CIGA failures - £"
+            ],
+            ("ECO4 CIGA failures", "", "Estimated failures - #", ""): eco4_post_ciga_remaining_results[
+                "Estimated CIGA failures"
+            ],
+            ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[
+                "Estimated CIGA failures - £"
+            ],
+            # GBIS postcode list
+            ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total_pre_cancellations,
+            ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"):
+                gbis_total_pre_cancellations_revenue,
+            ("GBIS Postcode list", "Warmfront post code list", "GBIS VARIANCE", "GBIS total"): gbis_variance,
+            ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
+            ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate,
+            # This is for jobs that are in-progress and could still cancel
+            ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"):
+                gbis_remaining_pre_cancellations,
+            ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"):
+                gbis_remaining_pre_cancellations_revenue,
+            ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):
+                gbis_delta_vs_original_estimate_remaining,
+            # Expected cancellations
+            (
+                "GBIS Postcode list", "", "Of which expected sales - £ - £",
+                "GBIS total"): gbis_remaining_revenue,
+            ("GBIS Postcode list", "", "Of which expected cancellations -£", "GBIS total"):
+                gbis_remaining_expected_cancellations_revenue
+        }
+
+        # Make sure nothing is forgotten due to duplicate multi-index keys
+        if len(to_append) != 51:
+            raise ValueError("Something went wrong")
+
+        results.append(to_append)
+
+    results = pd.DataFrame(results)
+    results.to_csv("pipeline_remaining_raw.csv")
+
+    totals_row = {}
+    for col in results.columns:
+        if col == ('', '', '', 'HA Name'):
+            totals_row[col] = "Total"
+        elif col in [
+            ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", ""),
+            ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "")
+        ]:
+            totals_row[col] = None
+        else:
+            totals_row[col] = results[col].sum()
+
+    # For the delta columns, we calculate the delta on the totals
+    totals_row[("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", "")] = (
+        (
+            totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")] -
+            totals_row[("ECO4 original", "", "Remaining - #", "")]
+        ) / totals_row[("ECO4 original", "", "Remaining - #", "")]
+    )
+
+    totals_row[("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "")] = (
+        (
+            totals_row[("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total")] -
+            totals_row[("GBIS original", "", "Remaining - #", "")]
+        ) / totals_row[("GBIS original", "", "Remaining - #", "")]
+    )
+
+    blank_row = pd.DataFrame([{col: "" for col in results.columns}])
+
+    # Put together a Warmfront original remaining ECO4 vs asset list remaining ECO4 and same for GBIS, as well as totals
+
+    # ECO4 Headlines
+    headline_eco4_original_remaining = totals_row[("ECO4 original", "", "Remaining - #", "")]
+    headline_eco4_original_remaining_revenue = totals_row[("ECO4 original", "", "Remaining - £", "")]
+    headline_eco4_postcode_list_remaining = totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")]
+    headline_eco4_postcode_list_remaining_revenue = totals_row[
+        ("ECO4 post-ciga", "", "Estimated remaining eligible - £", "")
+    ]
+    headline_eco4_delta = 100 * (
+        (headline_eco4_postcode_list_remaining - headline_eco4_original_remaining) /
+        headline_eco4_original_remaining
+    )
+    headline_eco4_delta = round(headline_eco4_delta, 1)
+
+    # GBIS Headlines
+    headline_gbis_original_remaining = totals_row[("GBIS original", "", "Remaining - #", "")]
+    headline_gbis_original_remaining_revenue = totals_row[("GBIS original", "", "Remaining - £", "")]
+    headline_gbis_postcode_list_remaining = totals_row[
+        ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total")
+    ]
+    headline_gbis_postcode_list_remaining_revenue = totals_row[
+        ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total")
+    ]
+    headline_gbis_delta = 100 * (
+        (headline_gbis_postcode_list_remaining - headline_gbis_original_remaining) /
+        headline_gbis_original_remaining
+    )
+    headline_gbis_delta = round(headline_gbis_delta, 1)
+
+    headline_original_total_revenue_remaining = (
+        headline_eco4_original_remaining_revenue + headline_gbis_original_remaining_revenue
+    )
+
+    headline_postcode_list_total_revenue_remaining = (
+        headline_eco4_postcode_list_remaining_revenue + headline_gbis_postcode_list_remaining_revenue
+    )
+    headline_total_delta = 100 * (
+        (headline_postcode_list_total_revenue_remaining - headline_original_total_revenue_remaining) /
+        headline_original_total_revenue_remaining
+    )
+    headline_total_delta = round(headline_total_delta, 1)
+
+    headline_eco4_sold_since_november = (
+        totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] +
+        totals_row[('ECO4 pre-ciga', '', 'Confirmed cancellations - £', '')] +  # confirmed canclleations
+        totals_row[('ECO4 pre-ciga', '', 'Unconfirmed cancellations - £', '')] -  # expected cancellations
+        totals_row[('ECO4 original', '', 'Sold or cancelled - £', '')]
+    )
+
+    headline_gbis_sold_since_november = (
+        totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] +
+        totals_row[("GBIS Postcode list", "", "Confirmed cancellations - £", "")] +  # confirmed cancellations
+        totals_row[("GBIS Postcode list", "", "Unconfirmed cancellations - £", "")] -  # expected cancellations
+        totals_row[('GBIS original', '', 'Sold or cancelled - £', '')]
+    )
+
+    headlines = [
+        {
+            ("", "", "", "HA Name"): "Headlines",
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - November - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_original_remaining
+
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - November - £",
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_eco4_original_remaining_revenue
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Sold or cancelled since November - £",
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_eco4_sold_since_november
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_postcode_list_remaining
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - £",
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_eco4_postcode_list_remaining_revenue
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 £ remaining delta - %",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_eco4_delta) + "%"
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - November - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_original_remaining
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - November - £",
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_gbis_original_remaining_revenue
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Sold or cancelled since November - £",
+            (
+                "", "Original Warmfront estimate", "Total - #",
+                "ECO4 - November"): headline_gbis_sold_since_november
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - post code list - #",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_postcode_list_remaining
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS Remaining - post code list - £",
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_gbis_postcode_list_remaining_revenue
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS delta %",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_gbis_delta) + "%"
+        },
+        # Total revenue
+        {
+            ("", "", "", "HA Name"): "Total Remaining - November - £",
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_original_total_revenue_remaining
+        },
+        {
+            ("", "", "", "HA Name"): "Total Remaining - post code list (post CIGA) - £",
+            ("", "Original Warmfront estimate", "Total - #",
+             "ECO4 - November"): headline_postcode_list_total_revenue_remaining
+        },
+        {
+            ("", "", "", "HA Name"): "Total Remaining delta %",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_total_delta) + "%"
+        },
+    ]
+
+    assumptions = [
+        {
+            ("", "", "", "HA Name"): "Assumptions",
+        },
+        {
+            ("", "", "", "HA Name"): "ECO4 rate",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(eco4_rate)
+        },
+        {
+            ("", "", "", "HA Name"): "GBIS rate",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(gbis_rate)
+        },
+        {
+            ("", "", "", "HA Name"): "Median CIGA pass rate",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(median_ciga_success_rate * 100, 1)) + "%",
+        },
+        {
+            ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(maximum_ciga_conversion * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - #",
+             ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks We do not allow above this to be "
+                  "conservative"
+        },
+        {
+            ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(median_eco4_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - #",
+             ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Surveys that resulted "
+                  "in cancelled install are excluded."
+        },
+        {
+            ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
+            ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+                round(median_ciga_pass_to_install * 100, 1)) + "%",
+            ("ECO4 original", "", "Remaining - #",
+             ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Surveys that resulted in "
+                  "cancelled installs are excluded."
+        }
+    ]
+
+    results = pd.concat(
+        [
+            results,
+            pd.DataFrame([totals_row]),
+            blank_row,
+            pd.DataFrame(headlines),
+            blank_row,
+            blank_row,
+            pd.DataFrame(assumptions)
+        ]
+    )
+    with open("HA Remaining Analysis.csv", "w", newline="") as file:
+        # Write the DataFrame data without the index (adjust if you want the index).
+        results.to_csv(file, header=True, index=False)
+
+
+def fml_data_pull(loader):
+    has_bruh = [
+        "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
+        "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
+    ]
+
+    # Can't pull from EPC database because it's based in Scotland
+    # "HAXXX", "HAXX"
+    # DO
+    from backend.SearchEpc import SearchEpc
+    epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
+
+    failed_has = []
+    for ha in has_bruh:
+        print(f"Pulling data for {ha}")
+        try:
+            asset_list = loader.data[ha]["asset_list"].copy()
+            # properties found as eligibile
+            fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+
+            # For each property, search for the latest EPC
+            epc_data = []
+            for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
+
+                property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+
+                if ha == "HAXXX":
+                    to_join = [str(x) for x in
+                               [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"],
+                                row["Postcode"]] if x is not None]
+                    full_address = ", ".join(to_join)
+                else:
+                    full_address = row["matching_address"]
+
+                searcher = SearchEpc(
+                    address1=str(row["HouseNo"]),
+                    postcode=row["matching_postcode"],
+                    auth_token=epc_api_key,
+                    os_api_key="",
+                    property_type=property_type,
+                    full_address=full_address,
+                    fast=True
+                )
+                # Force the skipping of estimating the EPC
+                searcher.ordnance_survey_client.property_type = None
+                searcher.ordnance_survey_client.built_form = None
+
+                searcher.find_property(skip_os=True)
+                if searcher.newest_epc is None:
+                    continue
+
+                epc = {
+                    "asset_list_row_id": row["asset_list_row_id"],
+                    **searcher.newest_epc.copy()
+                }
+
+                epc_data.append(epc)
+
+            # Remove None entries
+            epc_data = [x for x in epc_data if x is not None]
+            # Save the data in S3 as a parquet
+            epc_data_df = pd.DataFrame(epc_data)
+            save_pickle_to_s3(
+                data=epc_data_df,
+                bucket_name="retrofit-datalake-dev",
+                s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle"
+            )
+        except Exception as e:
+            failed_has.append(ha)
+
+
+def extract_lower_bound(age_band):
+    if pd.isna(age_band):
+        return 1930
+    try:
+        return int(age_band.split(':')[1].split('-')[0].strip())
+    except (ValueError, IndexError):
+        return 1930
+
+
+def classify_loft(x):
+    # high confidence
+    if float(x["roof_insulation_thickness"]) <= 100:
+        return "high"
+
+    if float(x["roof_insulation_thickness"]) <= 200:
+        return "medium"
+
+    if float(x["roof_insulation_thickness"]) <= 270 and x["epc_age"] >= 5 * 365:
+        return "medium"
+
+    return "unlikely"
+
+
+def fml_analysis(loader):
+    assumed_ciga_pass_rate = 0.731
+    has_bruh = [
+        "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
+        "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+        "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
+        'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
+    ]
+
+    no_ciga_cavity_descriptions = [
+        "Cavity wall, as built, insulated (assumed)",
+        "Cavity wall, as built, no insulation (assumed)",
+        "Cavity wall, as built, partial insulation (assumed)",
+        "Cavity wall, no insulation (assumed)",
+        "Cavity wall, partial insulation (assumed)",
+        "Cavity wall,",
+        "Cavity wall, insulated (assumed)",
+        "Cavity wall, no insulation (assumed)",
+        "Cavity wall, as built, insulated (assumed)",
+        "Cavity wall, partial insulation (assumed)",
+    ]
+
+    # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass
+    #  them! Non-invasices will have checked the wall though
+
+    results = []
+    wall_descriptions = []
+    for ha_name in tqdm(has_bruh):
+
+        original_figures = loader.december_figures[
+            loader.december_figures["HA Name"] == ha_name
+            ].copy()
+        original_remaining = original_figures["ECO4 remaining"].values[0]
+        original_gbis_remaining = original_figures["GBIS remaining"].values[0]
+
+        # Read in the epc data
+        asset_list = loader.data[ha_name]["asset_list"].copy()
+        # properties found as eligibile
+        fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+        epc_data = read_pickle_from_s3(
+            bucket_name="retrofit-datalake-dev",
+            s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
+        )
+        # We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge
+        # issue at this point
+        epc_data = epc_data.drop_duplicates("uprn")
+        wall_descriptions.extend(epc_data["walls-description"].unique().tolist())
+
+        # time from the inspection to now
+        epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
+        if "estimated" not in epc_data.columns:
+            # For all after HA7, we don't use estimated surveys
+            epc_data["estimated"] = False
+
+        fuck_this = fml.merge(
+            epc_data, how="left", on="asset_list_row_id"
+        )
+        fuck_this["estimated"] = fuck_this["estimated"].fillna(True)
+        if fuck_this.shape[0] != fml.shape[0]:
+            raise Exception("What the fuck bruv")
+
+        # Take just remaining
+        if not loader.data[ha_name]["survey_list"].empty:
+            survey_list = (
+                loader.data[ha_name]["survey_list"][
+                    ~pd.isnull(loader.data[ha_name]["survey_list"]["asset_list_row_id"])
+                ]
+            )
+            fuck_this = fuck_this.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            fuck_this = fuck_this[pd.isnull(fuck_this["installation_status"])]
+            fuck_this = fuck_this.drop(columns=["installation_status"])
+
+        insulation_thicknesses = []
+        for _, x in fuck_this.iterrows():
+            if pd.isnull(x["roof-description"]):
+                continue
+            if x["roof-description"] == "SAP05:Roof":
+                continue
+
+            thickness = RoofAttributes(x["roof-description"]).process()["insulation_thickness"]
+            # If there is a + in the thickness, strip it out
+            thickness = str(thickness).replace("+", "")
+            insulation_thicknesses.append(
+                {'uprn': x["uprn"], "roof_insulation_thickness": thickness}
+            )
+        insulation_thicknesses = pd.DataFrame(insulation_thicknesses)
+
+        before_merge_shape = fuck_this.shape[0]
+        fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn")
+
+        if fuck_this.shape[0] != before_merge_shape:
+            raise Exception("SOMETHING WENT WRONG")
+
+        if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
+            # We perform the archetype test. If the property is a house, we it needs to be detached, semi-detached
+            # or end terrace. If it's a bungalow, it must be attached
+            fuck_this["passes_archetype"] = None
+            fuck_this["passes_archetype"] = np.where(
+                (fuck_this["property-type"] == "House") &
+                (fuck_this["built-form"].isin(["Semi-Detached", "End-Terrace", "Detached"])),
+                True,
+                fuck_this["passes_archetype"]
+            )
+
+            fuck_this["passes_archetype"] = np.where(
+                (fuck_this["property-type"] == "Bungalow") &
+                (fuck_this["built-form"].isin(["Detached"])),
+                True,
+                fuck_this["passes_archetype"]
+            )
+
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") &
+                (fuck_this["passes_archetype"] == True),
+                "eco4 (subject to ciga)",
+                fuck_this["ECO Eligibility"]
+            )
+
+            # If failed the archetype check and needs a CIGA, it's not eligibile
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") &
+                (fuck_this["passes_archetype"] != True),
+                "not eligible",
+                fuck_this["ECO Eligibility"]
+            )
+
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") &
+                (fuck_this["passes_archetype"] == True),
+                "eco4",
+                fuck_this["ECO Eligibility"]
+            )
+
+            fuck_this["ECO Eligibility"] = np.where(
+                (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") &
+                (fuck_this["passes_archetype"] != True),
+                "gbis",
+                fuck_this["ECO Eligibility"]
+            )
+
+            if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
+                raise Exception("DO THE DAMN ARCHETYPE CHECK BRO")
+
+        # clean roof insulation
+        fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("below average", "50")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("None", "0")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("none", "0")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("average", "150")
+        fuck_this["roof_insulation_thickness"] = fuck_this[
+            "roof_insulation_thickness"
+        ].str.replace("above 150", "150")
+
+        fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1)
+
+        had_survey = fuck_this[fuck_this["estimated"] == False]
+
+        # proportion with a survey:
+        proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0]
+
+        # Let's look just at the ECO4 business
+        # For things that had a survey, take the properties that didn't need a CIGA check
+        no_ciga_check_needed = had_survey[
+            had_survey["ECO Eligibility"] == "eco4"
+            ]
+
+        no_ciga_check_needed_eligible = no_ciga_check_needed[
+            (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
+            (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
+            ]
+
+        # For anything not needing a CIGA check, some of it will be GBIS
+        no_ciga_check_needed_eligible_gbis = no_ciga_check_needed[
+            (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) &
+            (~no_ciga_check_needed["asset_list_row_id"].isin(no_ciga_check_needed_eligible["asset_list_row_id"].values))
+            ]
+
+        # Characterise no CIGA check needed
+        ciga_check_passed = had_survey[had_survey["ECO Eligibility"] == "eco4 - passed ciga"]
+        # These should be treated the same as one that have passed their ciga checks, from a detection perspective
+        ciga_check_passed_eligible = ciga_check_passed[
+            (ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) &
+            (ciga_check_passed["roof_classiciation"].isin(["high", "medium"])) &
+            (ciga_check_passed["current-energy-efficiency"].astype(float) <= 80)
+            ]
+
+        if not loader.data[ha_name]["ciga_list"].empty:
+
+            proportions = loader.data[ha_name]["ciga_list"]["Guarantee"].value_counts(normalize=True)
+            ha_ciga_pass_rate = proportions[proportions.index == "No"].values[0]
+
+        else:
+            ha_ciga_pass_rate = assumed_ciga_pass_rate
+
+        # We take just the cavity walls
+        # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
+        # This paper is based on London properties
+        # The proportion of EPCs with building characteristics errors are shown to
+        # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
+        # compared with ~5% for wall insulation and glazing performance
+
+        ciga_check_needed = had_survey[
+            had_survey["ECO Eligibility"].str.contains("subject to ciga")
+        ].copy()
+
+        ciga_check_needed_eligible = ciga_check_needed[
+            (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
+            (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
+            (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
+            ]
+
+        # Finally, characterise gbis properties. Some of the business might look like ECO4 work, whereas we then
+        # qualify what actually looks like gbis
+        gbis_identified = had_survey[
+            had_survey["ECO Eligibility"] == "gbis"
+            ].copy()
+
+        gbis_looks_like_eco4 = gbis_identified[
+            (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (gbis_identified["roof_classiciation"].isin(["high", "medium"])) &
+            (gbis_identified["current-energy-efficiency"].astype(float) <= 80) &
+            (
+                (
+                    (gbis_identified["property-type"] == "House") &
+                    (gbis_identified["built-form"] != "Mid-Terrace")
+                ) | (
+                    (gbis_identified["property-type"] == "Bungalow") &
+                    (gbis_identified["built-form"].isin(["Detached"]))
+                )
+            )
+            ]
+
+        gbis_qualified = gbis_identified[
+            (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) &
+            (gbis_identified["current-energy-efficiency"].astype(float) <= 80) &
+            (~gbis_identified["asset_list_row_id"].isin(gbis_looks_like_eco4["asset_list_row_id"].values))
+            ]
+
+        ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate)
+        without_ciga_expectation = no_ciga_check_needed_eligible.shape[0]
+        passed_ciga_expectation = ciga_check_passed_eligible.shape[0]
+        identified_as_gbis_looks_like_eco4 = gbis_looks_like_eco4.shape[0]
+
+        # Need to add on the non-ciga
+        total_eco4_expectation = (
+            ciga_check_expectation +
+            without_ciga_expectation +
+            passed_ciga_expectation +
+            identified_as_gbis_looks_like_eco4
+        )
+
+        no_ciga_check_needed_actually_gbis = no_ciga_check_needed_eligible_gbis.shape[0]
+        gbis_qualified = gbis_qualified.shape[0]
+
+        total_gbis_expectation = no_ciga_check_needed_actually_gbis + gbis_qualified
+
+        if proportion_with_survey < 100:
+            # We estimate the rest
+            without_survey_needing_ciga = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True)
+                ]
+
+            if without_survey_needing_ciga.empty:
+                without_survey_without_ciga_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+
+                if ciga_check_needed.shape[0] == 0 and ciga_check_expectation == 0:
+                    without_survey_without_ciga_expected = without_survey_needing_ciga.shape[0]
+                else:
+                    without_survey_without_ciga_expected = np.round(
+                        without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
+                    )
+
+            without_survey_passed_ciga = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "eco4 - passed ciga")
+                ]
+
+            if without_survey_passed_ciga.empty:
+                without_survey_passed_ciga_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_passed_ciga_expected = np.round(
+                    without_survey_passed_ciga.shape[0] * (passed_ciga_expectation / ciga_check_passed.shape[0])
+                )
+
+            # Finally, no ciga needed
+            without_survey_eco4 = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "eco4")
+                ]
+
+            if without_survey_eco4.empty:
+                without_survey_eco4_expected = 0
+                without_survey_gbis_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_eco4_expected = np.round(
+                    without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0])
+                )
+
+                without_survey_gbis_expected = np.round(
+                    without_survey_eco4.shape[0] * (total_gbis_expectation / no_ciga_check_needed.shape[0])
+                )
+
+            # And gbis
+            without_survey_gbis = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "gbis")
+                ]
+
+            if without_survey_gbis.empty:
+                without_survey_identified_as_gbis_qualified = 0
+                without_survey_identified_as_gbis_eco4 = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_identified_as_gbis_qualified = np.round(
+                    without_survey_gbis.shape[0] * (gbis_qualified / gbis_identified.shape[0])
+                )
+
+                without_survey_identified_as_gbis_eco4 = np.round(
+                    without_survey_eco4.shape[0] * (identified_as_gbis_looks_like_eco4 / gbis_identified.shape[0])
+                )
+
+            total_eco4_expectation = (
+                total_eco4_expectation +
+                without_survey_without_ciga_expected +
+                without_survey_passed_ciga_expected +
+                without_survey_eco4_expected +
+                without_survey_identified_as_gbis_eco4
+            )
+
+            total_gbis_expectation = (
+                total_gbis_expectation +
+                without_survey_gbis_expected +
+                without_survey_identified_as_gbis_qualified
+            )
+
+        results.append(
+            {
+                "HA Name": ha_name,
+                "Original ECO4 Estimate - Remaining": original_remaining,
+                "Original GGBIS Estimate - Remaining": original_gbis_remaining,
+                # "Postcode List - Remaining": postcode_list_remaining,
+                # "Of which sold": sales_since_nov,
+                "EPC verified ECO4 Eligible - Remaining": int(total_eco4_expectation),
+                "EPC verified GBIS Eligibile - Remaining": int(total_gbis_expectation),
+            }
+        )
+
+    results_df = pd.DataFrame(results)
+    results_df.to_csv("analysis - revised.csv")
+
+    # results_df["Delta vs November"] = 100 * (
+    #     results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
+    # ) / results_df["Original ECO4 Estimate - Remaining"]
+
+    # TODO: Add in estimated GBIS (for eco jobs, of which look like gbis)
+    # TODO: Change the left hand side number for our post CIGA estimates
+
+
+def create_final_report():
+    """
+    This function will produce the final output for the HA analysis
+    :return:
+    """
+    epc_validated_results = pd.read_csv("analysis - revised.csv")
+    pipeline_results = pd.read_csv("pipeline_remaining_raw.csv")
+
+    ####################################
+    # Original Warmfront estimates
+    ####################################
+    # Create the volumes result
+    all_ha_summary_remaining = pipeline_results[
+        [
+            "('', '', '', 'HA Name')",
+            "('ECO4 original', '', 'Remaining - #', '')",
+            "('GBIS original', '', 'Remaining - #', '')",
+        ]
+    ].copy().rename(
+        columns={
+            "('', '', '', 'HA Name')": "HA Name",
+            "('ECO4 original', '', 'Remaining - #', '')": "# ECO4 remaining - All HA Summary",
+            "('GBIS original', '', 'Remaining - #', '')": "# GBIS remaining - All HA Summary",
+        }
+    )
+    all_ha_summary_remaining["# Total remaining - All HA Summary"] = (
+        all_ha_summary_remaining["# ECO4 remaining - All HA Summary"] +
+        all_ha_summary_remaining["# GBIS remaining - All HA Summary"]
+    )
+    all_ha_summary_remaining = all_ha_summary_remaining.sort_values("HA Name")
+
+    ####################################
+    # Postcode list - pre-CIGA
+    ####################################
+    postcode_list_pre_ciga_remaining = pipeline_results[
+        [
+            "('', '', '', 'HA Name')",
+            "('ECO4 pre-ciga', '', 'Remaining - #', '')",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')",
+        ]
+    ].copy().rename(
+        columns={
+            "('', '', '', 'HA Name')": "HA Name",
+            "('ECO4 pre-ciga', '', 'Remaining - #', '')": "# ECO4 remaining - Postcode list (pre CIGA)",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": (
+                "# GBIS remaining - Postcode list (pre CIGA)"
+            ),
+        }
+    )
+
+    postcode_list_pre_ciga_remaining["# Total remaining - Postcode list (pre CIGA)"] = (
+        postcode_list_pre_ciga_remaining["# ECO4 remaining - Postcode list (pre CIGA)"] +
+        postcode_list_pre_ciga_remaining["# GBIS remaining - Postcode list (pre CIGA)"]
+    )
+    postcode_list_pre_ciga_remaining = postcode_list_pre_ciga_remaining.sort_values("HA Name")
+
+    ####################################
+    # Postcode list - post-CIGA
+    ####################################
+    postcode_list_post_ciga_remaining = pipeline_results[
+        [
+            "('', '', '', 'HA Name')",
+            "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')",
+        ]
+    ].copy().rename(
+        columns={
+            "('', '', '', 'HA Name')": "HA Name",
+            "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')":
+                "# ECO4 remaining - Postcode list (post CIGA)",
+            "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": (
+                "# GBIS remaining - Postcode list (post CIGA)"
+            ),
+        }
+    )
+
+    postcode_list_post_ciga_remaining["# Total remaining - Postcode list (post CIGA)"] = (
+        postcode_list_post_ciga_remaining["# ECO4 remaining - Postcode list (post CIGA)"] +
+        postcode_list_post_ciga_remaining["# GBIS remaining - Postcode list (post CIGA)"]
+    )
+    postcode_list_post_ciga_remaining = postcode_list_post_ciga_remaining.sort_values("HA Name")
+
+    ####################################
+    # From EPC Database
+    ####################################
+    from_epc_database = epc_validated_results[
+        [
+            "HA Name",
+            "EPC verified ECO4 Eligible - Remaining",
+            "EPC verified GBIS Eligibile - Remaining"
+        ]
+    ].copy().rename(
+        columns={
+            "EPC verified ECO4 Eligible - Remaining": "# ECO4 remaining - From EPC Database (post CIGA)",
+            "EPC verified GBIS Eligibile - Remaining": "# GBIS remaining - From EPC Database (post CIGA)",
+        }
+    )
+
+    from_epc_database["# Total remaining - From EPC Database (post CIGA)"] = (
+        from_epc_database["# ECO4 remaining - From EPC Database (post CIGA)"] +
+        from_epc_database["# GBIS remaining - From EPC Database (post CIGA)"]
+    )
+    from_epc_database = from_epc_database.sort_values("HA Name")
+
+    # Combine the datasets
+    volumes = all_ha_summary_remaining.merge(
+        postcode_list_pre_ciga_remaining, how="left", on="HA Name"
+    ).merge(
+        postcode_list_post_ciga_remaining, how="left", on="HA Name"
+    ).merge(
+        from_epc_database, how="inner", on="HA Name"
+    )
+
+    revenue = volumes.copy()
+    # Convert the ECO4 volumes to revenue
+    for col in [
+        '# ECO4 remaining - All HA Summary',
+        '# ECO4 remaining - Postcode list (pre CIGA)',
+        '# ECO4 remaining - Postcode list (post CIGA)',
+        '# ECO4 remaining - From EPC Database (post CIGA)'
+    ]:
+        revenue[col] = revenue[col] * 1710
+
+    # Convert the GBIS volumes to revenue
+    for col in [
+        '# GBIS remaining - All HA Summary',
+        '# GBIS remaining - Postcode list (pre CIGA)',
+        '# GBIS remaining - Postcode list (post CIGA)',
+        '# GBIS remaining - From EPC Database (post CIGA)'
+    ]:
+        revenue[col] = revenue[col] * 600
+
+    # Re-calculate the totals
+    revenue['# Total remaining - All HA Summary'] = (
+        revenue['# ECO4 remaining - All HA Summary'] + revenue['# GBIS remaining - All HA Summary']
+    )
+
+    revenue['# Total remaining - Postcode list (pre CIGA)'] = (
+        revenue['# ECO4 remaining - Postcode list (pre CIGA)'] + revenue['# GBIS remaining - Postcode list (pre CIGA)']
+    )
+
+    revenue['# Total remaining - Postcode list (post CIGA)'] = (
+        revenue['# ECO4 remaining - Postcode list (post CIGA)'] + revenue[
+        '# GBIS remaining - Postcode list (post CIGA)']
+    )
+
+    revenue['# Total remaining - From EPC Database (post CIGA)'] = (
+        revenue['# ECO4 remaining - From EPC Database (post CIGA)'] +
+        revenue['# GBIS remaining - From EPC Database (post CIGA)']
+    )
+
+    # Replace the # with £ in the columns
+    revnue_colnames = [col.replace("#", "£") for col in revenue.columns]
+    revenue.columns = revnue_colnames
+
+    # We check that each column gets smaller
+    decreasing_check1 = all(
+        volumes["# ECO4 remaining - Postcode list (pre CIGA)"] >= volumes[
+            '# ECO4 remaining - Postcode list (post CIGA)']
+    )
+    if not decreasing_check1:
+        raise ValueError("decreasing_check1 failed")
+
+    # Just HA32 and HA17 should fail this, and it's due to GBIS jobs looking like ECO4
+    decreasing_check2 = volumes[volumes["# ECO4 remaining - From EPC Database (post CIGA)"] > volumes[
+        "# ECO4 remaining - Postcode list (post CIGA)"]]
+
+    if set(decreasing_check2["HA Name"].tolist()) != {"HA17", "HA32"}:
+        raise ValueError("decreasing_check2 failed")
+
+    # Check for GBIS
+    decreasing_check3 = all(
+        volumes["# GBIS remaining - Postcode list (pre CIGA)"] >= volumes[
+            '# GBIS remaining - Postcode list (post CIGA)']
+    )
+
+    if not decreasing_check3:
+        raise ValueError("decreasing_check3 failed")
+
+    # Don't perform this - this happens for multiple
+    # decreasing_check4 = volumes[volumes["# GBIS remaining - From EPC Database (post CIGA)"] > volumes[
+    #     "# GBIS remaining - Postcode list (post CIGA)"]]
+
+    # Store final outputs
+    volumes.to_csv("HA Analysis Final - volumes.csv")
+    revenue.to_csv("HA Analysis Final - revenue.csv")
+
+
+def app():
+    """
+    This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
+    Only HA 6 has surveys
+    :return:
+    """
+
+    # Determines if we want to use the cached data in s3
+    use_cache = True
+    # Determines if we want to perform the data pull
+    pull_data = False
+    # Override to re-build all inputs
+    rebuild_inputs = False
+
+    # List all of the data in the folder
+    directories = [str(file) for entry in DATA_FOLDER.iterdir() if entry.is_dir()
+                   for file in entry.iterdir() if file.suffix == '.xlsx']
+    # Grab the December HA figures filepath
+    december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
+
+    # Add in:
+    priority_has = [
+        "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24",
+        "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54",
+        "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
+        # Added as of March 18th
+        "HA44", "HA45", "HA51", "HA52", "HA17", "HA5", "HA20",
+        # New HAS
+        "HAXX", "HAXXX",
+    ]
+    # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
+    # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
+    # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE]
     #
-    #     # Re-do
-    #     res = []
-    #     for _, row in tqdm(datasets["results_df"].iterrows(), total=datasets["results_df"].shape[0]):
-    #         epc = {
-    #             "walls-description": row["walls"],
-    #             "roof-description": row["roof"],
-    #             "floor-description": "",
-    #             "tenure": "",
-    #             "current-energy-efficiency": row["sap"],
-    #         }
-    #         eligibility = Eligibility(epc=epc, cleaned=cleaned)
-    #         eligibility.check_eco4_warmfront()
-    #         res.append(
-    #             {
-    #                 "row_id": row["row_id"],
-    #                 "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
-    #                 "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
-    #             }
-    #         )
+    # Consider for ECO4:
+    # HA 70 - have to merge ECO3 list though,
+    # HA17 has LOTs of assets, but the asset list is a mess
+    # HA53 but has EPCs done
+    # Consider for GBIS:
+    # Ignore for now:
+    # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
+    # Filter down the directories to only the priority HAs
+    directories = [d for d in directories if d.split("/")[2] in priority_has]
+
+    loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
+    loader.load()
+    loader.ha_facts_and_figures()
+    forecast_remaining_sales(loader)
+
+    # gbis rate
+    # breakdowns = []
+    # for ha, data_assets in loader.data.items():
+    #     asset_list = data_assets["asset_list"].copy()
+    #     breakdown = asset_list["ECO Eligibility"].value_counts().to_dict()
+    #     breakdowns.append(breakdown)
+    # breakdowns = pd.DataFrame(breakdowns)
     #
-    #     # Merge back on
-    #     res = pd.DataFrame(res)
-    #     datasets["results_df"] = datasets["results_df"].merge(res, how="left", on="row_id")
+    # installer = []
+    # for ha, data_assets in loader.data.items():
+    #     survey_list = data_assets["survey_list"]
+    #     if survey_list.empty:
+    #         continue
+    #     if "INSTALLER" not in survey_list.columns:
+    #         continue
     #
-    #     # Re-save in s3
-    #     save_pickle_to_s3(
-    #         data={
-    #             "results_df": datasets["results_df"],
-    #             "scoring_df": datasets["scoring_df"],
-    #             "nodata": datasets["nodata"]
-    #         },
-    #         bucket_name="retrofit-datalake-dev",
-    #         s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle"
-    #     )
+    #     installers = survey_list["INSTALLER"].value_counts().to_dict()
+    #     installers["ha_name"] = ha
+    #     installer.append(installers)
+    # installer = pd.DataFrame(installer)
+    # installer.drop(columns=["ha_name"]).sum().sum()
+
+    # Adhoc - for HA16, get the properties that still need a CIGA check
+    asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()
+    ha_16_need_ciga = asset_list_ha16[
+        asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga")
+    ]
+    completed_cigas = loader.data["HA16"]["ciga_list"].copy()
+    # Store the results
+    ha_16_need_ciga.to_csv("ha16_need_ciga.csv")
+    completed_cigas.to_csv("ha16_completed_cigas.csv")
+
+    # Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for
+    # live projects
+
+    # Read excel
+    orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx"
+    orderbook_workbook = openpyxl.load_workbook(orderbook_filepath)
+    orderbook_sheet = orderbook_workbook["Contractual Info"]
+    orderbook_colnames = [cell.value for cell in orderbook_sheet[1]]
+
+    rows = []
+    for row in orderbook_sheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        rows.append(row_data)
+
+    orderbook = pd.DataFrame(rows, columns=orderbook_colnames)
+    live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy()
+    live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "")
+
+    dormant_properties = []
+    missed_has = []
+    for _, customer in live_orderbook.iterrows():
+        if customer['Redacted HA'] not in loader.data.keys():
+            missed_has.append(customer['Redacted HA'])
+            continue
+        asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy()
+        survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy()
+        # Remove sold
+        if not survey_list.empty:
+            survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
+            asset_list = asset_list.merge(
+                survey_list[["asset_list_row_id", "installation_status"]],
+                how="left",
+                on="asset_list_row_id"
+            )
+            # Anything that has an installation has gone to installation, and therefore is not remaining
+            asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
+            asset_list = asset_list.drop(columns=["installation_status"])
+
+        # We pull out the properties that need a CIGA check
+        need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"]
+        need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"]
+        need_ciga_and_archetype = asset_list[
+            asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)"
+            ]
+
+        dormant_properties.append(
+            {
+                "HA Name": customer['Redacted HA'],
+                "Need CIGA": need_ciga.shape[0],
+                "Need Archetype": need_archetype.shape[0],
+                "Need CIGA and Archetype": need_ciga_and_archetype.shape[0]
+            }
+        )
+
+    dormant_properties = pd.DataFrame(dormant_properties)
+    totals = dormant_properties.sum()
+    totals["HA Name"] = "Total"
+
+    dormant_properties = pd.concat([dormant_properties, totals.to_frame().T])
+    dormant_properties.to_csv("dormant_properties.csv")
+
+    loader.december_figures["ECO4 remaining"].sum()
+    december_figures = loader.december_figures.copy()
+    december_figures["ECO4 remaining"] = np.where(
+        december_figures["ECO4 remaining"] < 0,
+        0,
+        december_figures["ECO4 remaining"]
+    )
+    december_figures["ECO4 remaining"].sum()
diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index dac829e2..7040d66c 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
         common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
 
         self.df = self.df.loc[
-            :,
-            no_suffix_cols
-            + only_ending_cols
-            + [col for cols in common_cols for col in cols],
-        ]
+                  :,
+                  no_suffix_cols
+                  + only_ending_cols
+                  + [col for cols in common_cols for col in cols],
+                  ]
 
     def _remove_abnormal_change_in_floor_area(self):
         """
@@ -509,7 +509,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_sandstone_or_limestone"]
                     == expanded_df["is_sandstone_or_limestone_ending"]
                 )
-            ]
+                ]
         elif component == "floor":
             expanded_df = expanded_df[
                 (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@@ -526,7 +526,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_to_external_air"]
                     == expanded_df["is_to_external_air_ending"]
                 )
-            ]
+                ]
         elif component == "roof":
             expanded_df = expanded_df[
                 (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@@ -539,7 +539,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["has_dwelling_above"]
                     == expanded_df["has_dwelling_above_ending"]
                 )
-            ]
+                ]
 
         return expanded_df
 
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index c793716f..e74330a2 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -725,26 +725,26 @@ class EPCRecord:
         if self.prepared_epc["construction-age-band"] in DATA_ANOMALY_MATCHES:
             if self.old_data:
                 # Take the most recent
-                max_datetime = max(
-                    [
-                        old_record["lodgement-datetime"]
-                        for old_record in self.old_data
-                        if old_record["construction-age-band"]
-                           not in DATA_ANOMALY_MATCHES
-                    ]
-                )
-
-                most_recent = [
-                    old_record
+                old_age_bands = [
+                    old_record["lodgement-datetime"]
                     for old_record in self.old_data
-                    if old_record["lodgement-datetime"] == max_datetime
+                    if old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES
                 ]
 
-                self.prepared_epc["construction-age-band"] = (
-                    EPCDataProcessor.clean_construction_age_band(
-                        most_recent[0]["construction-age-band"]
+                if old_age_bands:
+                    max_datetime = max(old_age_bands)
+
+                    most_recent = [
+                        old_record
+                        for old_record in self.old_data
+                        if old_record["lodgement-datetime"] == max_datetime
+                    ]
+
+                    self.prepared_epc["construction-age-band"] = (
+                        EPCDataProcessor.clean_construction_age_band(
+                            most_recent[0]["construction-age-band"]
+                        )
                     )
-                )
 
         self.construction_age_band = self.prepared_epc["construction-age-band"]
         self.age_band = england_wales_age_band_lookup.get(self.construction_age_band)
diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py
index 53c1a329..3f1a1a80 100644
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@@ -36,8 +36,11 @@ def app():
     cleaned_data = {}
     epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
 
+    WALLS = []
     for directory in tqdm(epc_directories):
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+        z = data["WALLS_DESCRIPTION"].unique().tolist()
+        WALLS.extend(z)
         # Rename the columns to the same format as the api returns
         data.columns = [c.replace("_", "-").lower() for c in data.columns]
         # Take just date before the date threshold
diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py
index 9d3b46b4..76f99f09 100644
--- a/etl/epc_clean/epc_attributes/RoofAttributes.py
+++ b/etl/epc_clean/epc_attributes/RoofAttributes.py
@@ -122,6 +122,13 @@ class RoofAttributes(Definitions):
         result["is_valid"] = "invalid" not in description
         description = description.replace("invalid", "")
 
+        # We handle an edge case where the description is "pitched, 150  loft insulation" and is missing the mm
+        if result["is_pitched"] or result["is_loft"]:
+            # Search for a regular expression that matches 150   insulation
+            match = re.search(r"(\d+\+?)\s*insulation", description)
+            if match:
+                result['insulation_thickness'] = match.group(1)
+
         # insulation thickness
         thickness_map = {
             "ceiling insulated": "average",
@@ -137,11 +144,11 @@ class RoofAttributes(Definitions):
                 # Remove the match from the description
                 # description = description.replace(key, "")
                 break
-        else:
-            # Extract insulation thickness in mm, if present
-            match = re.search(r'(\d+\+?)\s*mm', description)
-            if match:
-                result['insulation_thickness'] = match.group(1)
+
+        # Extract insulation thickness in mm, if present
+        match = re.search(r'(\d+\+?)\s*mm', description)
+        if match:
+            result['insulation_thickness'] = match.group(1)
 
         if "insulation_thickness" not in result:
             result['insulation_thickness'] = None
diff --git a/utils/s3.py b/utils/s3.py
index cb55094a..8d36bdb3 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -184,7 +184,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
         logger.errpr("Incomplete credentials provided.")
         return None
     except Exception as e:
-        logger.errpr(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
+        logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
         return None
 
     # Deserialize data from pickle format