Merge pull request #444 from Hestia-Homes/remote-assessment-api

Remote assessment api
2026-07-27 23:35:01 +00:00 · 2025-06-22 15:37:56 +01:00 · 2025-06-22 15:37:56 +01:00 · b81e2a4eba
commit b81e2a4eba
parent 7992218f0b 127773a19d
24 changed files with 2567 additions and 451 deletions
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -7,7 +7,7 @@
      <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
    </content>
-    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,7 +3,7 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
  <component name="PyCharmProfessionalAdvertiser">
    <option name="shown" value="true" />
  </component>
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -2,8 +2,6 @@ import os
 import json
 import pandas as pd
 from pprint import pprint
-import msgpack
-from utils.s3 import read_from_s3
 from asset_list.AssetList import AssetList
 from asset_list.mappings.property_type import PROPERTY_MAPPING
 from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
@ -62,98 +60,227 @@ def app():
    Property UPRN
    """

-    # Thurrock
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
-    data_filename = "THURROCK COUNCIL - For analysis.xlsx"
-    sheet_name = "Assets"
+    # NCHA
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
+    data_filename = "Energy Information MASTER June 2025.xlsx"
+    sheet_name = "Data"
    postcode_column = 'Postcode'
-    fulladdress_column = "Full Address"
+    fulladdress_column = "Address"
    address1_column = None
    address1_method = "house_number_extraction"
    address_cols_to_concat = []
    missing_postcodes_method = None
-    landlord_year_built = "Construction Date"
+    landlord_year_built = "Build Date (HAR10)"
    landlord_os_uprn = None
-    landlord_property_type = "Property Type"
-    landlord_built_form = "Property Subtype"
-    landlord_wall_construction = None
+    landlord_property_type = "Property Type (HAR10)"
+    landlord_built_form = "Build Form (EPC)"
+    landlord_wall_construction = "Wall Description"
    landlord_roof_construction = None
-    landlord_heating_system = "Main Heating Type"
+    landlord_heating_system = "HEAT Code"
    landlord_existing_pv = None
-    landlord_property_id = "Property Reference"
-    landlord_sap = None
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
+    landlord_property_id = "Place ref"
+    landlord_sap = "EPC SAP"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    outcomes_address = None
    master_filepaths = []
    master_to_asset_list_filepath = None
    phase = False
    ecosurv_landlords = None
+    asset_list_header = 0
+    landlord_block_reference = None
+    master_id_colnames = []

-    # Medway
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
-    data_filename = "MEDWAY Asset List.xlsx"
-    sheet_name = "Asset list"
-    postcode_column = 'Postcode'
-    fulladdress_column = None
-    address1_column = "House Number"
-    address1_method = None
-    address_cols_to_concat = ["House Number", "Street 1"]
-    missing_postcodes_method = None
-    landlord_year_built = "Year Built"
-    landlord_os_uprn = None
-    landlord_property_type = "Property Type - Academy"
-    landlord_built_form = "Property Type - Academy"
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "Row ID"
-    landlord_sap = None
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
-    master_filepaths = []
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
+    # data_filename = "07.04 CALICO - Final List.xlsx"
+    # asset_list_header = 2
+    # sheet_name = "Final List"
+    # postcode_column = 'Postcode'
+    # fulladdress_column = None
+    # address1_column = "Property Number / Name"
+    # address1_method = None
+    # address_cols_to_concat = [
+    #     "Property Number / Name",
+    #     "Street",
+    #     "Town"
+    # ]
+    # missing_postcodes_method = None
+    # landlord_year_built = "NROSH Estimated Build Date"
+    # landlord_os_uprn = None
+    # landlord_property_type = "Asset Type"
+    # landlord_built_form = None
+    # landlord_wall_construction = "Wall Type"
+    # landlord_heating_system = "Boiler Type"
+    # landlord_existing_pv = None
+    # landlord_property_id = "Asset Reference"
+    # outcomes_filename = []
+    # outcomes_sheetname = []
+    # outcomes_postcode = []
+    # outcomes_houseno = []
+    # outcomes_id = []
+    # outcomes_address = []
+    # master_filepaths = []
+    # master_id_colnames = []
+    # master_to_asset_list_filepath = None
+    # landlord_roof_construction = None
+    # landlord_block_reference = None
+    # landlord_sap = "Current Efficiency Rating - Score"
+    # phase = None
+    # ecosurv_landlords = None

-    # MHS
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
-    data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
-    sheet_name = "Sheet1"
-    postcode_column = 'Postcode'
-    fulladdress_column = "FullAddress"
-    address1_column = None
-    address1_method = "house_number_extraction"
-    address_cols_to_concat = []
-    missing_postcodes_method = None
-    landlord_year_built = "BuiltInYear"
-    landlord_os_uprn = None
-    landlord_property_type = "AssetType"
-    landlord_built_form = "PropertyType"
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "UPRN"
-    landlord_sap = None
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
-    master_filepaths = []
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
+    # data_folder = (
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset
+    #     List"
+    # )
+    # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx"
+    # sheet_name = "Assets"
+    # postcode_column = 'Postcode'
+    # fulladdress_column = "Address"
+    # address1_column = None
+    # address1_method = "house_number_extraction"
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "Build Year"
+    # landlord_os_uprn = None
+    # landlord_property_type = "Property Archetype"
+    # landlord_built_form = None
+    # landlord_wall_construction = None
+    # landlord_heating_system = "Heating Fuel Type"
+    # landlord_existing_pv = None
+    # landlord_property_id = "Uprn - DO NOT DELETE"
+    # outcomes_filename = [
+    #     os.path.join(data_folder, "RT - LiveWest.xlsx")
+    # ]
+    # outcomes_sheetname = ["Feedback"]
+    # outcomes_postcode = ["Poscode"]
+    # outcomes_houseno = ["No."]
+    # outcomes_id = ["UPRN"]
+    # outcomes_address = ["Address"]
+    # master_filepaths = [
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
+    #     Master "
+    #     "- redacted for analysis/CAVITY-Table 1.csv"
+    # ]
+    # master_id_colnames = [None]
+    # master_to_asset_list_filepath = None
+    # landlord_roof_construction = None
+    # landlord_block_reference = None
+    # landlord_sap = None
+    # phase = None
+    # ecosurv_landlords = "livewest|live west"
+
+    # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March "
+    #                "2025/Livewest Asset List (Original) - csv")
+    # data_filename = "Report-Table 1.csv"
+    # sheet_name = None
+    # postcode_column = 'Postcode'
+    # fulladdress_column = "T1_Address"
+    # address1_column = None
+    # address1_method = "house_number_extraction"
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "Build Yr"
+    # landlord_os_uprn = None
+    # landlord_property_type = "T1_AssetType"
+    # landlord_built_form = "T1_AssetType"
+    # landlord_wall_construction = "Wall Type Cavity"
+    # landlord_heating_system = "Heating Fuel"
+    # landlord_existing_pv = None
+    # landlord_property_id = "T1_UPRN"
+    # outcomes_filename = [
+    #     os.path.join(data_folder, "RT - LiveWest.xlsx")
+    # ]
+    # outcomes_address = ["Address"]
+    # outcomes_sheetname = ["Feedback"]
+    # outcomes_postcode = ["Poscode"]
+    # outcomes_houseno = ["No."]
+    # outcomes_id = ["UPRN"]
+    # master_filepaths = [
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
+    #     Master "
+    #     "- redacted for analysis/CAVITY-Table 1.csv"
+    # ]
+    # master_id_colnames = [None]
+    # master_to_asset_list_filepath = None
+    # landlord_roof_construction = None
+    # landlord_block_reference = None
+    # landlord_sap = None
+    # phase = None
+    # ecosurv_landlords = "livewest|live west"
+
+    # Stori
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru"
+    # data_filename = "Asset list - for analysis.xlsx"
+    # sheet_name = "SAP and Costs Calculations"
+    # postcode_column = 'Postcode'
+    # fulladdress_column = "Address1"
+    # address1_column = None
+    # address1_method = "house_number_extraction"
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "Age"
+    # landlord_os_uprn = None
+    # landlord_property_type = "TYPE"
+    # landlord_built_form = "AGE / DETACHMENT"
+    # landlord_wall_construction = "WALL"
+    # landlord_roof_construction = "LOFT INSULATION"
+    # landlord_heating_system = "BOILER"
+    # landlord_existing_pv = "SOLAR PV"
+    # landlord_property_id = "UPRN"
+    # landlord_sap = "Current SAP Rating"
+    # landlord_block_reference = None
+    # outcomes_filename = []
+    # outcomes_sheetname = []
+    # outcomes_postcode = []
+    # outcomes_houseno = []
+    # outcomes_id = []
+    # outcomes_address = []
+    # master_filepaths = []
+    # master_to_asset_list_filepath = None
+    # master_id_colnames = []
+    # phase = False
+    # ecosurv_landlords = None
+
+    # Thrive - reconciliation
+    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
+    # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
+    # sheet_name = "Sheet1"
+    # postcode_column = 'postcode'
+    # fulladdress_column = "full_address"
+    # address1_column = "address_line_1"
+    # address1_method = None
+    # address_cols_to_concat = []
+    # missing_postcodes_method = None
+    # landlord_year_built = "age_band_calculated"
+    # landlord_os_uprn = None
+    # landlord_property_type = "property_type"
+    # landlord_built_form = "build_form"
+    # landlord_wall_construction = None
+    # landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
+    # landlord_heating_system = "heating_type_updated"
+    # landlord_existing_pv = None
+    # landlord_property_id = "thrive_property_id"
+    # landlord_sap = "sap_rating_updated"
+    # landlord_block_reference = "block_reference"
+    # outcomes_filename = [
+    #     os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
+    # ]
+    # outcomes_sheetname = ["Sheet1"]
+    # outcomes_postcode = ["postcode"]
+    # outcomes_houseno = ["No."]
+    # outcomes_id = ["thrive_property_id"]
+    # outcomes_address = ["address"]
+    # master_filepaths = [
+    #     os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
+    #     os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
+    # ]
+    # master_to_asset_list_filepath = None
+    # master_id_colnames = ["thrive_property_id", "thrive_property_id"]
+    # phase = False
+    # ecosurv_landlords = "thrive"

    # Southern Midlands
    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
@ -182,40 +309,12 @@ def app():
    # master_filepaths = []
    # master_to_asset_list_filepath = None

-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
-    data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
-    sheet_name = "CHECKED"
-    postcode_column = 'Postcode'
-    fulladdress_column = None
-    address1_column = "AddressLine1"
-    address1_method = None
-    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
-    missing_postcodes_method = None
-    landlord_year_built = None
-    landlord_os_uprn = None
-    landlord_property_type = "Archetype (PFP)"
-    landlord_built_form = "Archetype (PFP)"
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "Uprn"
-    outcomes_filename = None
-    outcomes_sheetname = None
-    outcomes_postcode = None
-    outcomes_houseno = None
-    outcomes_id = None
-    master_filepaths = []
-    master_to_asset_list_filepath = None
-    landlord_sap = None
-    phase = None
-
    # Maps addresses to uprn in problematic cases
    manual_uprn_map = {}

    asset_list = AssetList(
        local_filepath=os.path.join(data_folder, data_filename),
-        header=0,
+        header=asset_list_header,
        sheet_name=sheet_name,
        address1_colname=address1_column,
        postcode_colname=postcode_column,
@ -233,6 +332,7 @@ def app():
        landlord_heating_system=landlord_heating_system,
        landlord_existing_pv=landlord_existing_pv,
        landlord_sap=landlord_sap,
+        landlord_block_reference=landlord_block_reference,
        phase=phase
    )
    asset_list.init_standardise()
@ -294,7 +394,8 @@ def app():

    asset_list.flag_survey_master(
        master_filepaths=master_filepaths,
-        master_to_asset_list_filepath=master_to_asset_list_filepath
+        master_to_asset_list_filepath=master_to_asset_list_filepath,
+        master_id_colnames=master_id_colnames,
    )

    asset_list.flag_ecosurv(ecosurv_landlords)
@ -306,7 +407,7 @@ def app():
    epc_api_only = False
    force_retrieve_data = False
    skip = None  # Used to skip already completed chunks
-    chunk_size = 5000
+    chunk_size = 2000
    filename = "Chunk {i}.csv"
    download_folder = os.path.join(data_folder, "Chunks")
    if not os.path.exists(download_folder):
@ -486,59 +587,12 @@ def app():
    )

    asset_list.merge_data(epc_df)
-
    asset_list.extract_attributes()
+    asset_list.identify_worktypes()

-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
-    )
-    cleaned = msgpack.unpackb(cleaned, raw=False)
-
-    asset_list.identify_worktypes(cleaned)
-
-    pprint(asset_list.work_type_figures)
-
-    asset_list.flat_analysis()
-
-    asset_list.load_contact_details(
-        local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
-        sheet_name="Report 1",
-        landlord_property_id=asset_list.landlord_property_id,
-        phone_number_column='Property Current Tel. Number',
-        fullname_column='Proeprty Current Occupant',
-        firstname_column=None,
-        lastname_column=None,
-        email_column=None,  # TODO - we need this
-    )
-
-    # Convert to a format suitable for CRM
-    # TODO: TEMP
-    assigned_surveyors = pd.DataFrame(
-        [
-            {
-                asset_list.landlord_property_id: "02610001",
-                "week_commencing": "10/10/2025",
-                "surveyor_name": "Khalim Conn-Kowlessar",
-                "surveyor_email": "khalim@domna.homes",
-            }
-        ]
-    )
-
-    # TODO: Sort the output by postcode
-
-    company_domain = "ealing.gov.uk"
-    crm_pipeline_name = "Survey Management"
-    first_dealstage = "READY TO BEGIN SCHEDULING"
-    # TODO - temp, upload to either SharePoint or AWS
-
-    asset_list.prepare_for_crm(
-        assigned_surveyors=assigned_surveyors,
-        company_domain=company_domain,
-        crm_pipeline_name=crm_pipeline_name,
-        first_dealstage=first_dealstage
-    )
-    hubspot_data = asset_list.hubspot_data
+    # We now flag the status of the property
+    asset_list.label_property_status()
+    asset_list.analyse_geographies()

    # Store as an excel
    filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
@ -546,7 +600,8 @@ def app():

    with pd.ExcelWriter(filename) as writer:
        asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
-        asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
+        if asset_list.block_analysis_df is not None:
+            asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
        # If we have outcomes, we add a tab with the outcomes
        if not asset_list.outcomes_for_output.empty:
            asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
@ -560,5 +615,5 @@ def app():
        if not asset_list.ecosurv_no_match.empty:
            asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)

-    # Store the Hubspot export as a csv
-    hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)
+        if not asset_list.geographical_areas.empty:
+            asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
--- a/asset_list/hubspot/config.py
+++ b/asset_list/hubspot/config.py
@ -0,0 +1,85 @@
+from enum import IntEnum, Enum
+
+CRM_PIPELINE_NAME = 'Operations - Housing Associations'
+
+
+class HubspotProcessStatus(IntEnum):
+    def __new__(cls, value, label):
+        obj = int.__new__(cls, value)
+        obj._value_ = value
+        obj.label = label
+        return obj
+
+    # the numerical values of this enum aren't important, but they define the order of operations
+
+    # This is the first stage, where a survey is ready to go
+    READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED"
+    # The property didn't get access and needs sign off
+    SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
+    # The survey has been completed. We don't have any update as to whether the property has been installed
+    SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
+    # The property turned out to be ineligibile
+    NOT_VIABLE = 4, "NOT VIABLE"
+    # The property is with the installer. This will likely be the default for historic programmes
+    SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER"
+    # The property has been installed
+    INSTALL_COMPLETE = 6, "INSTALL COMPLETE"
+    # The install has complete and lodgement is complete
+    LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE"
+    # The property has been cancelled
+    INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"
+
+
+class Installer(Enum):
+    SCIS = "SCIS"
+    JJ_CRUMP = "J & J CRUMP"
+    SGEC = "SGEC"
+
+    @classmethod
+    def is_valid_value(cls, value):
+        """
+        Check if the value is a valid installer.
+        """
+        return value in cls._value2member_map_
+
+
+CRM_UPLOAD_COLUMNS = [
+    'Name <LISTING hs_name>', 'Associations: Listing', 'Company Domain Name <COMPANY domain>',
+    'Email <CONTACT email>', 'First Name <CONTACT firstname>', 'Last Name <CONTACT lastname>',
+    'Phone <CONTACT phone>', 'Secondary Phone <CONTACT secondary_phone_number>',
+    'Secondary Contact Full Name <CONTACT secondary_contact_full_name>',
+    'Listing Owner Email <LISTING hubspot_owner_id>',
+    'Full Address <LISTING full_address>', 'Address 1 <LISTING hs_address_1>',
+    'Address 2 <LISTING hs_address_2>', 'Postcode <LISTING hs_zip>',
+    'Property Type <LISTING property_type>', 'Property Sub Type <LISTING property_sub_type>',
+    'Bedroom(s) <LISTING hs_bedrooms>', 'Domna Property ID <LISTING domna_property_id>',
+    'National UPRN <LISTING national_uprn>', 'Owner Property ID <LISTING owner_property_id>',
+    'Wall Construction <LISTING wall_construction>', 'Heating System <LISTING heating_system>',
+    'Year Built <LISTING hs_year_built>', 'Boiler Make <LISTING boiler_make>',
+    'Boiler Model <LISTING boiler_model>',
+    'Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>',
+    'Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>',
+    'Non-intrusives: Insulation <LISTING non_intrusives__insulation>',
+    'Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>',
+    'Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>',
+    'Non-Intrusives: PV Access Issues <LISTING non_intrusives__access_issues>',
+    'Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>',
+    'Non-Intrusives: Surveyor Notes <LISTING non_intrusives__surveyor_notes>',
+    'Non-Intrusives: Surveyor Name <LISTING non_intrusives__surveyor_name>',
+    'CIGA: Date Requested <LISTING ciga__date_requested>',
+    'CIGA: Cavity Guarantee Found <LISTING ciga__cavity_guarantee_found>',
+    'Last EPC: Is Estimated <LISTING last_epc__is_estimated>',
+    'Last EPC: EPC Rating <LISTING last_epc__epc_rating>',
+    'Last EPC: SAP Rating <LISTING last_epc__sap_rating>',
+    'Last EPC: Main Heating Description <LISTING last_epc__main_heating_description>',
+    'Last EPC: Heating Controls <LISTING last_epc__heating_controls>',
+    'Last EPC: Lodgement Date <LISTING last_epc__lodgement_date>',
+    'Last EPC: Floor Area <LISTING last_epc__floor_area>', 'Last EPC: Wall <LISTING last_epc__wall>',
+    'Last EPC: Roof <LISTING last_epc__roof>', 'Last EPC: Floor <LISTING last_epc__floor>',
+    'Last EPC: Room Height <LISTING last_epc__room_height>',
+    'Last EPC: Age Band <LISTING last_epc__age_band>', 'Deal Stage <DEAL dealstage>',
+    'Pipeline <DEAL pipeline>', 'Expected Commencement Date <DEAL expected_commencement_date>',
+    'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>',
+    'Product ID <LINE_ITEM hs_product_id>', 'Name <LINE_ITEM name>', 'Unit price <LINE_ITEM price>',
+    'Quantity <LINE_ITEM quantity>', 'Deal Owner', 'Amount <DEAL amount>', 'Installer <DEAL installer>'
+]
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@ -0,0 +1,91 @@
+import os
+import pandas as pd
+from asset_list.AssetList import AssetList
+
+
+def app():
+    """
+    TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after
+          review. So, we will need to update the hubspot status for these entries and set them to None, if they
+          were previously being set to ready for scheduling. We don't want to just filter on rows where
+          cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
+          them
+
+
+    TODO: If we wish to upload deals in batches
+
+    :return:
+    """
+
+    # inputs:
+    reconcile_programme = False  # If True, the hubspot upload will include all properties with a project code
+    customer_domain = "https://sandwell.gov.uk"
+    installer_name = "J & J CRUMP"
+    asset_list_filepath = (
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
+        "Standardised.xlsx"
+    )
+    asset_list_sheet_name = "Proposed Program"
+    asset_list_header = 1
+
+    contact_details_filepath = (
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
+    )
+    contacts_sheet_name = "Sheet1"
+    contacts_landlord_property_id = "landlord_property_id"
+    contacts_phone_number_column = "phone_number"
+    contacts_secondary_phone_number_column = "secondary_phone_number"
+    contacts_secondary_contact_full_name = "secondary_contact_full_name"
+    contacts_email_column = "email"
+    contacts_fullname_column = "fullname"
+    contacts_firstname_column = "firstname"
+    contacts_lastname_column = "lastname"
+
+    existing_programme_filepath = (
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv"
+    )
+
+    asset_list = AssetList.load_standardised_asset_list(
+        asset_list_filepath, asset_list_sheet_name, asset_list_header
+    )
+    asset_list.load_contact_details(
+        local_filepath=contact_details_filepath,
+        sheet_name=contacts_sheet_name,
+        landlord_property_id=contacts_landlord_property_id,
+        phone_number_column=contacts_phone_number_column,
+        secondary_phone_number_column=contacts_secondary_phone_number_column,
+        secondary_contact_full_name=contacts_secondary_contact_full_name,
+        email_column=contacts_email_column,
+        fullname_column=contacts_fullname_column,
+        firstname_column=contacts_firstname_column,
+        lastname_column=contacts_lastname_column
+    )
+
+    asset_list.prepare_for_crm(
+        company_domain=customer_domain,
+        installer_name=installer_name,
+        reconcile_programme=reconcile_programme
+    )
+
+    # Remove the existing programme
+    existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
+    asset_list.hubspot_data = asset_list.hubspot_data[
+        ~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
+            existing_programme['Domna Property ID'].values
+        )
+    ]
+
+    # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
+    directory, filename = os.path.split(asset_list_filepath)
+    name, ext = os.path.splitext(filename)
+    output_filename = f"{name} - Hubspot Upload.csv"
+    output_filepath = os.path.join(directory, output_filename)
+
+    if pd.isnull(asset_list.hubspot_data['Project Code <DEAL project_code>']).sum():
+        raise ValueError("FIX MEEE")
+
+    if pd.isnull(asset_list.hubspot_data['Deal Stage <DEAL dealstage>']).any():
+        raise ValueError("Warning: Some rows have missing project codes. These will not be uploaded to HubSpot.")
+
+    # Just store locally
+    asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@ -331,4 +331,33 @@ BUILT_FORM_MAPPINGS = {
    'Low Rise': 'low rise',
    'Upper Floor': 'top-floor',
    'High Rise': 'high rise',
+
+    '2012 ONWARDS DETACHED': 'detached',
+    '1950-66 END TERRACE': 'end-terrace',
+    '1976-82 MID TERRACED': 'mid-terrace',
+    '1950-66 MID TERRACE': 'mid-terrace',
+    '1991-95  DETACHED': 'detached',
+    '1976-82 END TERRACED': 'end-terrace',
+    '1967-75 DETACHED': 'detached',
+    'PRE 1900 DETACHED': 'detached',
+    'PRE 1900 MID TERRACE': 'mid-terrace',
+    '1900 DET': 'detached',
+    '1967-75 MID TERR': 'mid-terrace',
+    '1930-49 SEMI DET': 'semi-detached',
+    '1900-29 SEMI DET': 'semi-detached',
+    '1900-29 MID TERR': 'mid-terrace',
+    '1983- 90 MID TERR': 'mid-terrace',
+    '1976-82 MID TERR': 'mid-terrace',
+    '1983-90 END TERR': 'end-terrace',
+    '1991-95 SEMI DET': 'semi-detached',
+    '1983-90 SEMI DET': 'semi-detached',
+    '1991-95  MID TERR': 'mid-terrace',
+    '1950-66 SEMI DET': 'semi-detached',
+    '1900 MID TERR': 'mid-terrace',
+    '1967-75 SEMI DET': 'semi-detached',
+    '1983- 90 SEMI DET': 'semi-detached',
+    '1983-90 MID TERR': 'mid-terrace',
+    '1976-82 SEMI DET': 'semi-detached',
+    'PRE 1900 MID TERR': 'mid-terrace'
+
 }
--- a/asset_list/mappings/exising_pv.py
+++ b/asset_list/mappings/exising_pv.py
@ -16,5 +16,6 @@ EXISTING_PV_MAPPINGS = {
    'PV: 25% roof area, PV: 3.6kWp array': 'already has PV',
    'PV: 10% roof area, PV: 2kWp array': 'already has PV',
    'PV: 50% roof area': 'already has PV',
-    'Solar PV': 'already has PV'
+    'Solar PV': 'already has PV',
+    'SOLAR PV': 'already has PV'
 }
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@ -27,7 +27,7 @@ STANDARD_HEATING_SYSTEMS = {
    "electric ceiling",
    "electric underfloor",
    "no heating",
-    "non-electric underfloor"
+    "non-electric underfloor",
 }

 HEATING_MAPPINGS = {
@ -292,4 +292,39 @@ HEATING_MAPPINGS = {
    'Communal Heating': 'communal heating',
    'No Data': 'unknown',
    'Boiler System': 'gas condensing boiler',
+    'Storage heating': 'electric storage heaters',
+    'Storage heating (HHRSH)': 'high heat retention storage heaters',
+
+    'ELECTRIC BOILER': 'electric boiler',
+    'STORAGE HEATERS': 'electric storage heaters',
+    'GREENSTAR 24I JUNIOR': 'gas combi boiler',
+    'generic cond combi post98': 'gas condensing combi',
+    'SAP TABLE REG COND +98 NO PICTURE OF BOILER': 'gas condensing boiler',
+    'ECO TEC PRO 28 H COMBI A': 'gas combi boiler',
+    'GREENSTAR 25I ErP': 'gas combi boiler',
+    'IDEAL LOGIC MAX COMBI C30': 'gas combi boiler',
+    'ECO TEC PRO 28 (286/5-3)': 'gas combi boiler',
+    'IDEAL LOGIC HEAT 30': 'gas boiler, radiators',
+    'WORCESTER 240': 'gas boiler, radiators',
+    'ECO TEC PRO 24 (246/5-3)': 'gas combi boiler',
+    'ECO TEC PRO 28 (OLD)': 'gas combi boiler',
+    'LOGIC COMBI2 C30': 'gas combi boiler',
+    'GREENSTAR 28I JUNIOR': 'gas combi boiler',
+    'WORCESTER 24i': 'gas combi boiler',
+    'GREENSTAR 30I ErP': 'gas combi boiler',
+    '25 CDI': 'gas combi boiler',
+    'GREENSTAR 28CDI COMPACT ErP': 'gas combi boiler',
+    'GREENSTAR 24 RI': 'gas boiler, radiators',
+    'BAXI COMBI 105 HE': 'gas combi boiler',
+    'ECO TEC PRO 28 (OLD TYPE)': 'gas combi boiler',
+    'WORCESTER 28 SI ll RSF': 'gas combi boiler',
+    'GREENSTAR 30SI COMPACT ErP': 'gas combi boiler',
+    'SAP TABLE REG COND +98 NO PICTURE OF CYLINDER': 'gas condensing boiler',
+    'WORCESTER 24 SI ll RSF': 'gas combi boiler',
+    'GREENSTAR 4000': 'gas combi boiler',
+    'GREENSTAR 24i JUNIOR': 'gas combi boiler',
+    'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler',
+    'GREENSTAR 30SI COMPACT': 'gas combi boiler',
+    'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler',
+    'Not applicable for this asset type': 'unknown'
 }
--- a/asset_list/mappings/property_type.py
+++ b/asset_list/mappings/property_type.py
@ -252,5 +252,19 @@ PROPERTY_MAPPING = {
    'Bedsit bungalow semi detached': 'bedsit',
    'Bedsit Flat': 'bedsit',
    'Semi detached house': 'house',
-    'Unit': 'unknown'
+    'Unit': 'unknown',
+    'HOUSE (3 STOREY)': 'house',
+    'FLAT GROUND FLOOR': 'flat',
+    'FLAT TOP FLOOR': 'flat',
+
+    'SHARED HOUSE': 'house',
+    'MAISONETTE': 'maisonette',
+    'DIRECT ACCESS HOSTEL': 'other',
+    'Day centre': 'other',
+    'Care home': 'other',
+    'BLOCK (Communal)': 'block of flats',
+    'SHOP': 'other',
+    'Office Block': 'other',
+    'BLOCK (Non-Communal)': 'block of flats',
+    'Refuge': 'other'
 }
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
    "pitched unknown access to loft",
    "piched unknown insulation",
    "pitched insulated",
-    "pitched less than 100mm insulation"
+    "pitched less than 100mm insulation",
    "another dwelling above",
    "flat unknown insulation",
    "unknown insulated",
@ -38,4 +38,18 @@ ROOF_CONSTRUCTION_MAPPINGS = {
    '200mm': 'pitched insulated',
    '0-49mm': 'pitched less than 100mm insulation',
    '50mm': 'pitched less than 100mm insulation',
+    '': 'unknown',
+    'NR': 'unknown',
+    'Non-joist': 'unknown',
+    '25mm': 'pitched less than 100mm insulation',
+    '400mm+': 'pitched insulated',
+    '12mm': 'pitched less than 100mm insulation',
+
+    '150MM': 'pitched insulated',
+    '200MM': 'pitched insulated',
+    '250MM': 'pitched insulated',
+    '100MM': 'pitched less than 100mm insulation',
+    'U/K': 'unknown',
+    'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation',
+    'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation'
 }
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@ -224,5 +224,31 @@ WALL_CONSTRUCTION_MAPPINGS = {
    'Traditional Cavity Brickwork': 'cavity unknown insulation',
    'System build (undefined)': 'system built',
    'Non Trad Wimpey': 'system built',
-    'Non Trad Wates': 'system built'
+    'Non Trad Wates': 'system built',
+
+    'CAVITY FILLED 270MM': 'filled cavity',
+    'CAVITY FILLED  270MM': 'filled cavity',
+    'CAVITY FILLED  250MM': 'filled cavity',
+    'CAVITY FILLED  260MM': 'filled cavity',
+    'CAVITY FILLED 260MM': 'filled cavity',
+    'SOLID A/B 220MM': 'solid brick unknown insulation',
+    'CAVITY A/B 300MM': "uninsulated cavity",
+    'CAVITY A/B 250MM': "uninsulated cavity",
+    'CAVITY A/B  260MM': "uninsulated cavity",
+    'CAVITY A/B  270MM': "uninsulated cavity",
+    'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation',
+    'CAVITY EWI': 'filled cavity',
+    'SANDSTONE/CAVITY EXT': 'sandstone or limestone',
+    'SYSTEM BUILD 100MM EWI': 'system built',
+    'CAVITY A/B 260MM': "uninsulated cavity",
+    'CAVITY A/B 270MM': "uninsulated cavity",
+    'CAVITY A/B  250MM': "uninsulated cavity",
+    'System': 'system built',
+    'Sandstone/Limestone': 'sandstone or limestone',
+    'No Fines': 'system built',
+    'Granite/Whinstone': 'granite or whinstone',
+    'Not applicable to this asset type': 'unknown',
+    'Steel Frame': 'system built',
+    'Solid Wall As Built': 'uninsulated solid brick',
+    'Solid As Built': 'uninsulated solid brick'
 }
--- a/asset_list/utils.py
+++ b/asset_list/utils.py
@ -79,7 +79,13 @@ def get_data(
                uprn=uprn
            )
            # Force the skipping of estimating the EPC
-            searcher.ordnance_survey_client.property_type = None
+            # We check if the property was split
+            if home.get("is_expended_block"):
+                searcher.ordnance_survey_client.property_type = "Flat"
+                searcher.property_type = "Flat"
+                searcher.set_strict_property_type_search()
+            else:
+                searcher.ordnance_survey_client.property_type = None
            searcher.ordnance_survey_client.built_form = None

            searcher.find_property(skip_os=True)
--- a/backend/Funding.py
+++ b/backend/Funding.py
@ -5,7 +5,7 @@ from typing import List
 from backend.app.plan.schemas import HousingType


-class Funding:
+class FundingOld:
    """
    Given a property, this class identifies if the home is possibly eligible for funding under
    the various funding schemes. It will also calculate the expected amount of funding available
@ -411,3 +411,190 @@ class Funding:
        self.gbis()
        # self.eco4()
        self.whlg()
+
+
+class Funding:
+    """
+    New class to handle funding calculation
+    """
+
+    def __init__(
+        self,
+        tenure: HousingType,
+        social_cavity_abs_rate: float,
+        social_solid_abs_rate: float,
+        private_cavity_abs_rate: float,
+        private_solid_abs_rate: float,
+        project_scores_matrix,
+        whlg_eligible_postcodes
+    ):
+        self.tenure = tenure
+        self.social_cavity_abs_rate = social_cavity_abs_rate
+        self.social_solid_abs_rate = social_solid_abs_rate
+        self.private_cavity_abs_rate = private_cavity_abs_rate
+        self.private_solid_abs_rate = private_solid_abs_rate
+
+        self.starting_sap_band = None
+        self.ending_sap_band = None
+        self.floor_area_band = None
+        self.project_scores_matrix = project_scores_matrix
+        self.whlg_eligible_postcodes = whlg_eligible_postcodes
+
+    @staticmethod
+    def get_sap_band(sap_score_number):
+        bands = [
+            ("High_A", 96, float("inf")),
+            ("Low_A", 92, 96),
+            ("High_B", 86, 92),
+            ("Low_B", 81, 86),
+            ("High_C", 74.5, 81),
+            ("Low_C", 69, 74.5),
+            ("High_D", 61.5, 69),
+            ("Low_D", 55, 61.5),
+            ("High_E", 46.5, 55),
+            ("Low_E", 39, 46.5),
+            ("High_F", 29.5, 39),
+            ("Low_F", 21, 29.5),
+            ("High_G", 10.5, 21),
+            ("Low_G", 1, 10.5),
+        ]
+
+        for band, lower, upper in bands:
+            if lower <= sap_score_number < upper:
+                return band
+
+        return None
+
+    @staticmethod
+    def get_floor_area_band(floor_area):
+        if floor_area <= 72:
+            return "0-72"
+
+        if floor_area <= 97:
+            return "73-97"
+
+        if floor_area <= 199:
+            return "98-199"
+
+        return "200"
+
+    @staticmethod
+    def eco4_prs_eligibility(
+        starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
+    ):
+        """
+        Handles the eligibility criteria for private rental properties under eco
+        :return: 
+        """
+
+        # Help to heat group
+        # 1) EPC E - G
+        # 2) Must receive one of SWI, FTCH, renewable heating or DHC
+        # 3) Tenant must be on benefits
+
+        # We don't consider the tenant being on benefits - we just notify the end user that this is a requirement
+
+        meets_epc = starting_sap <= 54
+        has_solid_wall = "internal_wall_insulation" in measures or "external_wall_insulation" in measures
+        # We check if the property has a heating system that means solar pv counts as a renewable heating system
+
+        has_eligible_electric_heating = any(x in mainheat_description for x in [
+            "air source heat pump", "ground source heat pump", "boiler and radiators, electric"
+        ]) | (("electric storage heaters" in mainheat_description) and
+              (heating_control_description.lower() == "controls for high heat retention storage heaters")
+              )
+
+        # Counts as renewable heating
+        solar_renweable_heating = has_eligible_electric_heating & ("solar_pv" in measures)
+        # Is a renewable heating
+        ashp = "air_source_heat_pump" in measures
+
+        if meets_epc & (solar_renweable_heating or ashp or has_solid_wall):
+            return True
+
+        return False
+
+    def calculate_full_project_abs(self):
+
+        # Filter the project scores matrix
+        data = self.project_scores_matrix[
+            (self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) &
+            (self.project_scores_matrix["Starting Band"] == self.starting_sap_band) &
+            (self.project_scores_matrix["Finishing Band"] == self.ending_sap_band)
+            ]
+
+        if data.emtpy:
+            raise ValueError("Missing abs rate, check the project scores matrix")
+
+        return data["Cost Savings"].values[0]
+
+    def check_funding(
+        self, measures: List,
+        starting_sap: int,
+        ending_sap: int,
+        floor_area: float,
+        mainheat_description: str,
+        heating_control_description: str,
+        is_cavity: bool
+    ):
+        """
+        Given a list of measures, this function will check if the package of measures is fundable
+        :param measures:
+        :param starting_sap:
+        :param ending_sap:
+        :param floor_area:
+        :param mainheat_description:
+        :param heating_control_description:
+        :param is_cavity: Indicates if the property has cavity wall insulation
+        :return:
+        """
+
+        # If it's an E or D, should get to an EPC C
+        if starting_sap >= 55 and ending_sap < 69:
+            raise NotImplementedError("This property doesn't have sufficient SAP movement")
+
+        if starting_sap <= 38 & ending_sap <= 55:
+            # F or G should get to D
+            raise NotImplementedError("Implement F or G to D eligibility")
+
+        self.starting_sap_band = self.get_sap_band(starting_sap)
+        self.ending_sap_band = self.get_sap_band(ending_sap)
+        self.floor_area_band = self.get_floor_area_band(floor_area)
+
+        ########################
+        # Private
+        ########################
+        # 1) ECO4
+        # 2) GBIS
+
+        if self.tenure == "Private":
+            is_eco4_eligible = self.eco4_prs_eligibility(
+                starting_sap=starting_sap,
+                measures=measures,
+                mainheat_description=mainheat_description,
+                heating_control_description=heating_control_description
+            )
+
+            # Need to implement
+            # 1) Package has to include an insulation measure
+            # 2) We should use the funding for the measure that has the largest partial project score
+            is_gbis_eligible = ()
+
+            if not is_eco4_eligible:
+                return
+            eco4_abs = self.calculate_full_project_abs()
+            # We estimate rates now
+            eco4_funding = (
+                eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate
+            )
+
+        ########################
+        # Social
+        ########################
+        # 1) ECO4
+        # 2) GBIS
+
+        if self.tenure == "Social":
+            pass
+
+        raise NotImplementedError("Only implemented for Private or Social housing")
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@ -160,6 +160,9 @@ class SearchEpc:
        """
        Address lines 1 and postcode are mandatory fields. The other address lines are optional
        but can be used to find the epc for the home, if address1 and postcode are insufficient
+
+        If you wish to run a strict property type search, please run set_strict_property_type_search()
+
        :param address1: string, propery's address line 1
        :param postcode: string, propery's postcode
        :param full_address: string, optional parameter, the full address of the property
@ -189,6 +192,7 @@ class SearchEpc:
        self.older_epcs = None
        self.full_sap_epc = None
        self.metadata = None
+        self.strict_property_type_search = False

        # These are the address and postcode values, which we store in the database
        self.address_clean = None
@ -199,6 +203,14 @@ class SearchEpc:
        self.property_type = property_type
        self.fast = fast

+    def set_strict_property_type_search(self):
+        """
+        This method sets the strict property type search flag to True. When this flag is set, the search will
+        only return results that match the specified property type.
+        :return:
+        """
+        self.strict_property_type_search = True
+
    @staticmethod
    def get_house_number(address: str, postcode=None) -> str | None:
        """
@ -315,6 +327,8 @@ class SearchEpc:
            address_params["address"] = self.address1
        if self.postcode:
            address_params["postcode"] = self.postcode
+        if self.strict_property_type_search and self.property_type:
+            address_params["property-type"] = self.property_type.lower()

        # We attempt the search with uprn params

@ -365,11 +379,16 @@ class SearchEpc:

        unique_property_types = {r["property-type"] for r in rows}

+        is_just_a_house = (len(unique_property_types) == 1) & (
+            ("House" in unique_property_types) | ("Bungalow" in unique_property_types)
+        )
+
        # We allow for variation in property type across flats/maisonettes
        # If we know that we have a flat/maisonette, we allow for both property types
-        if property_type in ["Flat", "Maisonette"]:
-            if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
-            ) or unique_property_types == {"Flat", "Maisonette"}):
+        # Make sure we have not JUST a house, or not JUST a flat/maisonette
+        if property_type in ["Flat", "Maisonette"] and not is_just_a_house:
+            if (((len(uprns) == 1) and ((len(unique_property_types) == 1)
+            ) or unique_property_types == {"Flat", "Maisonette"})):
                return rows

        if property_type is not None:
@ -424,6 +443,8 @@ class SearchEpc:

            return rows

+        raise ValueError("property type and address cannot both be None, at least one must be provided")
+
    @staticmethod
    def format_address(newest_epc):
        """
@ -702,6 +723,18 @@ class SearchEpc:
            exclude_old=exclude_old
        )

+        # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build
+        # so we avoid comparing it to new builds
+        # TODO - this is experimental
+        newer_age_bands = [
+            "England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011",
+            "England and Wales: 2012 onwards"
+        ]
+
+        if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum():
+            # We have some older age bands, so we need to filter them out
+            epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy()
+
        # If we have missing lodgment date, we fill it with inspection-date
        epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
        # If we still have missing dates, we set it to the mean of the non NA dates
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@ -507,7 +507,7 @@ async def model_engine(body: PlanTriggerRequest):
            )

            # if we have a remote assment data type, we pull the additional data and include it
-            if body.event_type == "remote_assessment":
+            if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]):
                logger.info("Retrieving find my epc data")
                try:
                    property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc(
--- a/backend/tests/test_funding.py
+++ b/backend/tests/test_funding.py
@ -0,0 +1,52 @@
+import pytest
+import pandas as pd
+from utils.s3 import read_csv_from_s3
+from backend.Funding import Funding
+
+
+def get_funding_data():
+    """
+    This function retrieves the eco project scores matrix and the warm homes local grant funding data
+    :return:
+    """
+    project_scores_matrix = read_csv_from_s3(
+        bucket_name="retrofit-data-dev",
+        filepath="funding/ECO4 Full Project Scores Matrix.csv",
+    )
+    project_scores_matrix = pd.DataFrame(project_scores_matrix)
+    project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings']
+    project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float)
+
+    whlg_eligible_postcodes = read_csv_from_s3(
+        bucket_name="retrofit-data-dev",
+        filepath="funding/whlg eligible postcodes.csv",
+    )
+    whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
+
+    return project_scores_matrix, whlg_eligible_postcodes
+
+
+class TestFunding:
+
+    def test_prs(self):
+        eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data()
+        funding = Funding(
+            project_scores_matrix=eco_project_scores_matrix,
+            whlg_eligible_postcodes=whlg_eligible_postcodes,
+            social_cavity_abs_rate=13.5,
+            social_solid_abs_rate=17,
+            private_cavity_abs_rate=13.5,
+            private_solid_abs_rate=17,
+            tenure="Private",
+        )
+
+        measures_1 = ["internal_wall_insulation", "solar_pv"]
+        funding.check_funding(
+            measures=measures_1,
+            starting_sap=54,
+            ending_sap=69,
+            floor_area=73,
+            mainheat_description="Boiler and radiators, mains gas",
+            heating_control_description="Programmer, room thermostat and TRVs",
+            is_cavity=True
+        )
--- a/Housing/validation_surveys.py
+++ b/Housing/validation_surveys.py
@ -0,0 +1,167 @@
+import pandas as pd
+
+
+def get_band(sap_score_number):
+    bands = [
+        ("High_A", 96, float("inf")),
+        ("Low_A", 92, 96),
+        ("High_B", 86, 92),
+        ("Low_B", 81, 86),
+        ("High_C", 74.5, 81),
+        ("Low_C", 69, 74.5),
+        ("High_D", 61.5, 69),
+        ("Low_D", 55, 61.5),
+        ("High_E", 46.5, 55),
+        ("Low_E", 39, 46.5),
+        ("High_F", 29.5, 39),
+        ("Low_F", 21, 29.5),
+        ("High_G", 10.5, 21),
+        ("Low_G", 1, 10.5),
+    ]
+
+    for band, lower, upper in bands:
+        if lower <= sap_score_number < upper:
+            return band
+
+    return None
+
+
+def classify_floor_area(floor_area):
+    if floor_area <= 72:
+        return "0-72"
+
+    if floor_area <= 97:
+        return "73-97"
+
+    if floor_area <= 199:
+        return "98-199"
+
+    return "200+"
+
+
+asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/ECO 4 Wates - Standardised.xlsx",
+    sheet_name="Standardised Asset List"
+)
+
+asset_list["starting_sap_band"] = asset_list["epc_sap_score_on_register"].apply(get_band)
+asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(classify_floor_area)
+
+# Objective:
+# We need to get a reasonable estimate for the cost of works for properties that are EPC D or below
+#
+# Therefore:
+# 1) We know that some properties that are currently EPC C may* qualify for ECO4 funding. Right now, we aren't trying
+# to determine which EPC C properties or above will qualify, just how much works will cost for properties that do
+# qualify
+# 2) We cannot survey everything, so before we undetake too much risk we should produce some costings for each of the
+# archetypes
+#
+# Driving Factors:
+# 1) Floor area band & starting SAP band - this will determine how much funding is produced
+# 2) Heating system - this will determine if the property needs a heating upgrade or not
+
+
+archetypes = asset_list[asset_list["epc_sap_score_on_register"] <= 68].groupby(
+    ["floor_area_band", "starting_sap_band", "landlord_heating_system"]
+)["landlord_property_id"].nunique().reset_index()
+archetypes = archetypes.rename(columns={"landlord_property_id": "n_properties"})
+archetypes = archetypes.sort_values("n_properties", ascending=False)
+archetypes["running_total"] = archetypes["n_properties"].cumsum()
+archetypes["cumulative_percentage"] = archetypes["running_total"] / archetypes["n_properties"].sum() * 100
+
+archetypes["is_electric"] = archetypes["landlord_heating_system"] != "boiler - other fuel"
+archetypes["needs_heating_upgrade"] = archetypes["landlord_heating_system"].isin(
+    ["boiler - other fuel", "electric storage heaters"]
+)
+archetypes = archetypes.reset_index(drop=True)
+
+# Right now, they don't want to treat the oil properties so we'll exclude them for the moment
+electric_heated_archetypes = (
+    archetypes[archetypes["landlord_heating_system"] != "boiler - other fuel"].copy().reset_index(drop=True)
+)
+electric_heated_archetypes["running_total"] = electric_heated_archetypes["n_properties"].cumsum()
+electric_heated_archetypes["cumulative_percentage"] = (
+    electric_heated_archetypes["running_total"] / electric_heated_archetypes["n_properties"].sum() * 100
+)
+
+# The main properties that need validation surveys are properties that require a heating upgrade
+electric_heated_archetypes = electric_heated_archetypes[electric_heated_archetypes["needs_heating_upgrade"]]
+electric_heated_archetypes = electric_heated_archetypes.merge(
+    archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
+    how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
+)
+
+oil_archetypes = archetypes[
+    archetypes["landlord_heating_system"] == "boiler - other fuel"
+    ].copy().reset_index(drop=True)
+
+archetypes["archetype_id"] = archetypes.index
+
+asset_list = asset_list.merge(
+    archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
+    how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
+)
+
+properties_for_verification = asset_list[
+    asset_list["archetype_id"].isin(electric_heated_archetypes["archetype_id"].values)
+].copy()
+properties_for_verification["postal_region"] = properties_for_verification["domna_postcode"].str.split(" ").str[
+    0].str.strip()
+
+properties_for_verification["epc_age"] = (
+    pd.Timestamp.now() - pd.to_datetime(properties_for_verification["epc_inspection_date"])
+).dt.days
+
+# We also survey 2 oil heater properties, so we take the 2 most prevelant archetypes
+archetypes_for_survey = pd.concat(
+    [electric_heated_archetypes, oil_archetypes.head(2)]
+)
+
+# Take the property with the oldest EPC, by region. Prioritise estimated properties
+sample = []
+for _, config in archetypes_for_survey.iterrows():
+    properties = asset_list[
+        (asset_list["archetype_id"] == config["archetype_id"]) &
+        (asset_list["floor_area_band"] == config["floor_area_band"]) &
+        (asset_list["starting_sap_band"] == config["starting_sap_band"])
+        ]
+
+    if pd.isnull(properties["epc_inspection_date"]).sum():
+        sample_property = properties[pd.isnull(properties["epc_inspection_date"])].head(1).to_dict("records")
+    else:
+        # Take the property with the oldest EPC
+        sample_property = properties.sort_values("epc_inspection_date", ascending=True).head(1).to_dict("records")
+
+    sample.extend(sample_property)
+
+sample = pd.DataFrame(sample)
+
+sample = sample[
+    [
+        "landlord_property_id", "epc_inspection_date", "epc_sap_score_on_register", "starting_sap_band",
+        "floor_area_band", "landlord_heating_system", "domna_postcode", "domna_full_address", "archetype_id"
+    ]
+]
+
+archetypes = asset_list[["landlord_property_id", "archetype_id"]].copy()
+archetypes["archetype_id"] = archetypes["archetype_id"].astype(str)
+
+filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/archetypes.xlsx"
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+
+with pd.ExcelWriter(filename) as writer:
+    archetypes.to_excel(writer, sheet_name="Archetypes", index=False)
+    sample.to_excel(writer, sheet_name="Survey Sample", index=False)
+
+# We store this
+
+# Questions:
+# 1) If futures are considering changing properties that have oil heating systems, we could include them and
+# we have 39 total archetypes. Otherwise, we have 25 archetypes
+# 2) Can futures provide us with any information on the model of air source heat pumps and associated controls they're
+# using
+
+# Recommendations:
+# 1) If they are willing to upgrade the heating systems of the oil properties, surveying 18 properties will cover
+#
--- a/etl/customers/cambridge/surveys.py
+++ b/etl/customers/cambridge/surveys.py
@ -0,0 +1,24 @@
+import pandas as pd
+from backend.ml_models.Valuation import PropertyValuation
+from backend.app.utils import sap_to_epc
+
+# Read in the survey data
+surveys = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/Survey Data.xlsx",
+    sheet_name="Survey data",
+)
+
+increases = []
+for _, x in surveys.iterrows():
+    current_epc = sap_to_epc(x["Pre SAP"])
+    target_epc = sap_to_epc(x["Scenario 1 Post SAP"])
+    current_value = x["Valuation"]
+
+    val = PropertyValuation.estimate_valuation_improvement(
+        current_value,
+        current_epc,
+        target_epc,
+        total_cost=None
+    )
+    avg_increase = val["average_increase"]
+    increases.append(round(avg_increase))
--- a/etl/customers/l_and_g/risk_matrix.py
+++ b/etl/customers/l_and_g/risk_matrix.py
@ -81,6 +81,7 @@ def app():
    # We need to calculate the costs
    cost_data = []
    for _, row in epr_data.iterrows():
+
        epc = row["EPC"][0]
        sap = int(row["EPC"][1:])

--- a/etl/customers/places_for_people/abs.py
+++ b/etl/customers/places_for_people/abs.py
@ -0,0 +1,199 @@
+"""
+This script is to calculate the ABS for the Places for People London project
+"""
+
+import os
+import pandas as pd
+
+# London
+pfp_london_cav = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
+    sheet_name="Cav Route",
+    header=1
+)
+pfp_london_cav = pfp_london_cav.rename(columns={"Route": "Route March"})
+pfp_london_pv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
+    sheet_name="PV Route",
+    header=1
+)
+pfp_london_pv = pfp_london_pv.rename(columns={"Route": "Route March"})
+pfp_london_cav["location"] = "London"
+pfp_london_pv["location"] = "London"
+# East
+pfp_east_cav = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_east_reviewed_standarised_15052025.xlsx",
+    sheet_name="Cav Route",
+    header=1
+)
+pfp_east_cav = pfp_east_cav.rename(columns={"Route": "Route March"})
+pfp_east_pv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_east_reviewed_standarised_15052025.xlsx",
+    sheet_name="PV Route",
+    header=1
+)
+pfp_east_pv = pfp_east_pv.rename(columns={"Route": "Route March"})
+pfp_east_cav["location"] = "East"
+pfp_east_pv["location"] = "East"
+# North east
+pfp_north_east_cav = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
+    sheet_name="Cav Route",
+    header=1
+)
+pfp_north_east_cav = pfp_north_east_cav.rename(columns={"Route": "Route March"})
+pfp_north_east_pv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
+    sheet_name="PV Route",
+    header=1
+)
+pfp_north_east_pv = pfp_north_east_pv.rename(columns={"Route": "Route March"})
+pfp_north_east_cav["location"] = "North East"
+pfp_north_east_pv["location"] = "North East"
+# North West
+pfp_north_west_cav = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
+    sheet_name="Cav Route",
+    header=1
+)
+pfp_north_west_cav = pfp_north_west_cav.rename(columns={"Route": "Route March"})
+pfp_north_west_pv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
+    "rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
+    sheet_name="PV Route",
+    header=1
+)
+pfp_north_west_pv = pfp_north_west_pv.rename(columns={"Route": "Route March"})
+pfp_north_west_cav["location"] = "North West"
+pfp_north_west_pv["location"] = "North West"
+
+cav_route = pd.concat(
+    [
+        pfp_london_cav,
+        pfp_east_cav,
+        pfp_north_east_cav,
+        pfp_north_west_cav
+    ]
+)
+solar_route = pd.concat(
+    [
+        pfp_london_pv,
+        pfp_east_pv,
+        pfp_north_east_pv,
+        pfp_north_west_pv
+    ]
+)
+
+
+def get_band(sap_score_number):
+    bands = [
+        ("High_A", 96, float("inf")),
+        ("Low_A", 92, 96),
+        ("High_B", 86, 92),
+        ("Low_B", 81, 86),
+        ("High_C", 74.5, 81),
+        ("Low_C", 69, 74.5),
+        ("High_D", 61.5, 69),
+        ("Low_D", 55, 61.5),
+        ("High_E", 46.5, 55),
+        ("Low_E", 39, 46.5),
+        ("High_F", 29.5, 39),
+        ("Low_F", 21, 29.5),
+        ("High_G", 10.5, 21),
+        ("Low_G", 1, 10.5),
+    ]
+
+    for band, lower, upper in bands:
+        if lower <= sap_score_number < upper:
+            return band
+
+    return None
+
+
+def classify_floor_area(floor_area):
+    if floor_area <= 72:
+        return "0-72"
+
+    if floor_area <= 97:
+        return "73-97"
+
+    if floor_area <= 199:
+        return "98-199"
+
+    return "200+"
+
+
+# We classify the abs bounds
+solar_route["starting_abs_band"] = solar_route["epc_sap_score_on_register"].apply(get_band)
+solar_route["ending_abs_band_scenario1"] = "High_C"
+solar_route["ending_abs_band_scenario2"] = "Low_B"
+solar_route["epc_total_floor_area"] = solar_route["epc_total_floor_area"].fillna(90)
+solar_route["floor_area_band"] = solar_route["epc_total_floor_area"].apply(classify_floor_area)
+
+# We classify the abs bounds
+cav_route["epc_sap_score_on_register"] = cav_route["epc_sap_score_on_register"].fillna(68)
+cav_route["starting_abs_band"] = cav_route["epc_sap_score_on_register"].apply(get_band)
+cav_route["floor_area_band"] = cav_route["epc_total_floor_area"].apply(classify_floor_area)
+cav_route["ending_abs_band"] = "Low_C"
+
+abs_matrix = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
+)
+
+cav_route = cav_route.merge(
+    abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
+    how="left",
+    left_on=["starting_abs_band", "ending_abs_band", "floor_area_band"],
+    right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
+)
+solar_route = solar_route.merge(
+    abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
+    how="left",
+    left_on=["starting_abs_band", "ending_abs_band_scenario1", "floor_area_band"],
+    right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
+)
+cav_route["ABS Rate"] = cav_route["ABS Rate"].fillna(0)
+solar_route["ABS Rate"] = solar_route["ABS Rate"].fillna(0)
+
+cav_abs_agg = (
+    cav_route.groupby("Route March").agg(
+        {
+            "ABS Rate": "sum",
+            "landlord_property_id": "count",
+        }
+    ).reset_index()
+)
+cav_abs_agg["Week Number"] = cav_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
+cav_abs_agg = cav_abs_agg.sort_values("Week Number", ascending=True)
+cav_abs_agg = cav_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
+
+solar_abs_agg = (
+    solar_route.groupby("Route March").agg(
+        {
+            "ABS Rate": "sum",
+            "landlord_property_id": "count",
+        }
+    ).reset_index()
+)
+solar_abs_agg["Week Number"] = solar_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
+solar_abs_agg = solar_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
+solar_abs_agg = solar_abs_agg.sort_values("Week Number", ascending=True)
+
+# We store the data
+# Store as an excel
+filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs rates/pfp programme rates.xlsx"
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+
+with pd.ExcelWriter(filename) as writer:
+    solar_abs_agg.to_excel(writer, sheet_name="Solar ABS", index=False)
+    cav_abs_agg.to_excel(writer, sheet_name="Cav ABS", index=False)
+
+    cav_route.to_excel(writer, sheet_name="Cavity data", index=False)
+    solar_route.to_excel(writer, sheet_name="Solar data", index=False)
--- a/etl/customers/thrive/Programme
+++ b/etl/customers/thrive/Programme
@ -8,6 +8,8 @@ address the following concerns:
 """

 import pandas as pd
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc

 # This is Thrive's list of properties and when they should have been surveyed
 thrive_tracker = pd.read_excel(
@ -51,27 +53,10 @@ original_columns = {
 }

 original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns)
-original_asset_list["Data Source"] = "Thrive Tracker"
+original_asset_list["Data Source"] = "Original Asset List"
+original_asset_list = original_asset_list.drop_duplicates()

 # We append on the missed properties, with the information we have
-# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#',
-#        'Adress Line 1', 'Postcode', 'Property Type', 'Build Form',
-#        'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number',
-#        'Secondary Number', 'Email', 'Thrive use: Tenancy Number',
-#        'Special Requirements ', 'CIGA', 'Date CIGA check received',
-#        'Proposed Progamme', 'New Proposed Programme',
-#        'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type',
-#        'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation',
-#        'Cost for Vents', 'Cavity Depth', 'Cavity Condition',
-#        'Date Submitted to installer', 'PRRN Number',
-#        'Loft insulation required? (Thrive)', 'Date booked ',
-#        'Completed\n(yes/no)', 'Date Completed',
-#        'Vents installed?\n(number and location)',
-#        'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ',
-#        'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added',
-#        'SA Cavity Condition Updated', 'SA Loft & Energy Updated',
-#        'PRRN Submitted '
-
 missed_properties["Full Address"] = (
    missed_properties["#"].astype(str) + ", " +
    missed_properties["Adress Line 1"].astype(str) + ", " +
@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected"
 missed_properties["ECO Eligibility"] = "Property Not Inspected"
 missed_properties["Data Source"] = "Thrive Tracker"

+# We de-dupe ides in original_asset_list
+dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique()
+dupes = original_asset_list[
+    original_asset_list["thrive_property_id"].isin(dupe_ids)
+].copy()
+dupes = dupes.sort_values("thrive_property_id")
+
+original_asset_list = original_asset_list.rename(
+    columns={
+        "detailed_property_type": "build_form"
+    }
+)
+
 master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True)

 # We were provided with a data update for a sample of properties. We update the data with this information
@ -103,12 +101,339 @@ data_update = pd.read_excel(
    header=0
 )

-new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)]
+new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy()
+new_properties["Full Address"] = (
+    new_properties["#"].astype(str) + ", " +
+    new_properties["Adress Line 1"].astype(str) + ", " +
+    new_properties["Postcode"].astype(str)
+)
+new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns)
+new_properties["WFT Findings"] = "Property Not Inspected"
+new_properties["ECO Eligibility"] = "Property Not Inspected"
+new_properties["Data Source"] = "13.05.2025 Data Update"
+
+master_list = pd.concat([new_properties, master_list])
+
+# We append any new data on heating system, heating type, and insulation type, based on the data update
+master_list = master_list.merge(
+    data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename(
+        columns={
+            "Heating Type": "heating_type_updated",
+            "Assumed mm ": "assumed_loft_insulation_thickness_updated",
+            "SAP": "sap_rating_updated"
+        }
+    ),
+    how="left",
+    left_on="thrive_property_id",
+    right_on="UPRN"
+)
+
+# We fill the missings
+master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"])
+master_list["assumed_loft_insulation_thickness_updated"] = master_list[
+    "assumed_loft_insulation_thickness_updated"
+].fillna(master_list["assumed_loft_insulation_thickness"])
+master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"])
+
+assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list"
+
+master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin(
+    thrive_tracker["UPRN"].astype(str).values
+)
+
+# Those the asset list - call it master asset list updated May2025
+master_list = master_list.drop(columns=["UPRN"])
+master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str)
+# master_list.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+#     "Complete - Updated May 2025.xlsx",
+# )
+
+master_list["house_number_TEMP"] = master_list.apply(
+    lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]),
+    axis=1
+)
+
+# We add in the status of the property
+# TODO: Add the status of the property from the Thrive tracker
+outcomes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April "
+    "24-March25.xlsx",
+    header=0
+)
+outcomes["row_id"] = outcomes.index
+
+# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these
+tracker_for_matching = thrive_tracker[
+    ~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1'])
+].copy()
+tracker_for_matching["Full Address"] = (
+    tracker_for_matching["#"].astype(str) + ", " +
+    tracker_for_matching["Adress Line 1"].astype(str) + ", " +
+    tracker_for_matching["Postcode"].astype(str)
+)
+
+outcomes_id_lookup = []
+for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
+
+    hn = str(x["No."])
+    address = x["Address"]
+    postcode = x["Postcode"]
+    contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"])
+    contact_no = None if contact_no == "nan" else contact_no
+
+    if address == "292 Micklefield Road":
+        hn = "292"
+
+    if (address == "Micklefield Road") & (hn == "302"):
+        hn = "292"
+
+    if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+        hn = "103a"
+
+    if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+        hn = "105a"
+
+    if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
+        hn = "107a"
+
+    #
+    # # We match this to the tracker
+    # m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no]
+    # # Many of the phone numbers don't have a leading zero in the tracker so we add them
+    # if (m1.shape[0] != 1) and not pd.isnull(contact_no):
+    #     m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")]
+    # 
+    # if m1.shape[0] > 1:
+    #     raise ValueError(
+    #         f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+    #     )
+
+    # if m1.empty:
+    m1 = tracker_for_matching[
+        (tracker_for_matching["#"].astype(str) == hn) &
+        (tracker_for_matching["Postcode"] == postcode)
+        ]
+
+    if m1.empty:
+        # Some properties aren't in the tracker, we match to the master list
+        m1 = master_list[
+            (master_list["house_number_TEMP"].astype(str) == hn) &
+            (master_list["postcode"] == postcode)
+            ]
+        outcomes_id_lookup.append(
+            {
+                "row_id": x["row_id"],
+                "thrive_property_id": m1["thrive_property_id"].values[0],
+                "address": m1["full_address"].values[0],
+                "postcode": m1["postcode"].values[0],
+            }
+        )
+        continue
+
+    if m1.shape[0] != 1:
+        raise ValueError(
+            f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
+        )
+
+    # We add the status to the master list
+    outcomes_id_lookup.append(
+        {
+            "row_id": x["row_id"],
+            "thrive_property_id": m1["UPRN"].values[0],
+            "address": m1["Full Address"].values[0],
+            "postcode": m1["Postcode"].values[0],
+        }
+    )
+
+outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup)
+outcomes = outcomes.merge(
+    outcomes_id_lookup,
+    how="left",
+    left_on="row_id",
+    right_on="row_id"
+)
+
+outcomes = outcomes.drop(columns=["row_id"])
+outcomes = outcomes.rename(
+    columns={
+        "Outcomes": "Outcome",
+        "Notes                                                                                         (If 'no "
+        "answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes",
+    }
+)


+# Store the corrected outcomes
+# outcomes.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes -
+#     April 24-March25 - Corrected.xlsx",
+#     index=False
+# )

-data_update = = data_update[["UPRN", ""]]

-# TODO: Flag the Thrive priorities and create a separate project code for these
-# TODO: Add the general project code
-# TODO: Add the thrive
+def parse_date(value):
+    # Strip any 'W.C' or 'w/c' prefix and clean whitespace
+    value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip()
+    try:
+        # Try parsing the date with dayfirst=True
+        return pd.to_datetime(value, dayfirst=True, errors='coerce')
+    except Exception:
+        return pd.NaT
+
+
+outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date)
+
+# Next step - match the submissions master to the asset list. We will append on the UPRN
+eco3_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO3.csv",
+    header=0
+)
+eco3_submissions["row_id"] = eco3_submissions.index
+
+eco4_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO4.csv",
+    header=0
+)
+eco4_submissions["row_id"] = eco4_submissions.index
+
+# List of properties never on the asset list
+not_on_master = [
+    "7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL",
+    "20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA",
+    "26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN",
+    "9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN",
+    "12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN",
+    "20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN",
+    "25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN",
+    "33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN",
+    '37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN',
+    '41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN',
+    '46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN',
+    '50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN'
+]
+
+eco3_remap = {
+    "19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'),
+    "29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'),
+    "31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'),
+    "44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'),
+    "64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'),
+    "11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'),
+    "16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'),
+    "58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'),
+    "10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'),
+    "25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+    "32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
+    "94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'),
+    '33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'),
+    '120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'),
+    '35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'),
+    '18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'),
+    '34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'),
+    '58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'),
+    '48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'),
+    '45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'),
+    '6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'),
+    '2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'),
+    '29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'),
+    '61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'),
+    '2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'),
+}
+
+eco3_lookup = []
+for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)):
+    hn = row["NO "]
+    pc = row["Post Code"]
+    street = row["Street / Block Name"]
+    key = f"{hn}+{street}+{pc}"
+    if key in not_on_master:
+        continue
+
+    if key in eco3_remap:
+        hn, street, pc = eco3_remap[key]
+        # The postcode is different to the asse
+
+    # We filter the asset list, because it's hard to know how accurate this is
+    m1 = master_list[
+        (master_list["house_number_TEMP"].astype(str) == hn) &
+        (master_list["postcode"] == pc)
+        ]
+
+    if m1.shape[0] != 1:
+        raise ValueError(
+            f"Error for {key} in the tracker"
+        )
+
+    eco3_lookup.append(
+        {
+            "row_id": row["row_id"],
+            "thrive_property_id": m1["thrive_property_id"].values[0],
+            "submission_house_number": row["NO "],
+            "submission_address1": row["Street / Block Name"],
+            "submission_postcode": row["Post Code"],
+        }
+    )
+
+eco4_lookup = []
+for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)):
+    hn = row["NO."]
+    pc = row["Post Code"]
+    street = row["Street / Block Name"]
+    key = f"{hn}+{street}+{pc}"
+    if key in not_on_master:
+        continue
+
+    if key in eco3_remap:
+        hn, street, pc = eco3_remap[key]
+        # The postcode is different to the asse
+
+    # We filter the asset list, because it's hard to know how accurate this is
+    m1 = master_list[
+        (master_list["house_number_TEMP"].astype(str) == hn) &
+        (master_list["postcode"].str.lower() == pc.lower())
+        ]
+
+    if m1.shape[0] != 1:
+        raise ValueError(
+            f"Error for {key} in the tracker"
+        )
+
+    eco4_lookup.append(
+        {
+            "row_id": row["row_id"],
+            "thrive_property_id": m1["thrive_property_id"].values[0],
+            "submission_house_number": row["NO."],
+            "submission_address1": row["Street / Block Name"],
+            "submission_postcode": row["Post Code"],
+        }
+    )
+
+# We match the lookups back to the submission sheets
+eco3_lookup = pd.DataFrame(eco3_lookup)
+eco3_submissions = eco3_submissions.merge(
+    eco3_lookup,
+    how="left",
+    on="row_id",
+)
+
+eco4_lookup = pd.DataFrame(eco4_lookup)
+eco4_submissions = eco4_submissions.merge(
+    eco4_lookup,
+    how="left",
+    on="row_id",
+)
+
+# Store
+eco3_submissions.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO3 - with IDS.csv",
+    index=False
+)
+eco4_submissions.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
+    "ECO4 - with IDS.csv",
+    index=False
+)
--- a/etl/customers/thrive/Project
+++ b/etl/customers/thrive/Project
@ -0,0 +1,130 @@
+"""
+THis script will take the standardised asset list and append on the project codes.
+We also, review the existing install status, in case anything is wrong
+"""
+import pandas as pd
+import numpy as np
+
+standardised_asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Standardised Asset List",
+)
+
+project_code_allocations = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
+    "Warmfront).xlsx",
+    sheet_name="Master Tracker",
+    header=1
+)
+
+programme_codes = project_code_allocations[
+    ["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ]
+].copy()
+programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy()
+programme_codes["programme_reference"] = np.where(
+    pd.isnull(programme_codes["programme_reference"]),
+    programme_codes["Proposed Progamme"],
+    programme_codes["programme_reference"]
+)
+
+PROJECT_CODE_MAP = {
+    'Phase 2': "THRIVE-002",
+    'Phase 3': "THRIVE-003",
+    'Phase 4': "THRIVE-004",
+    'Phase 5': "THRIVE-005",
+    'Phase 6': "THRIVE-006",
+    'Phase 7': "THRIVE-007",
+    'Phase 8': "THRIVE-008",
+    'Phase 9': "THRIVE-009",
+    'Phase 10': "THRIVE-010",
+    "Week 1": "THRIVE-WEEK-001",
+    "Week 2": "THRIVE-WEEK-002",
+    "Week 4": "THRIVE-WEEK-004",
+    "Week 7": "THRIVE-WEEK-007",
+}
+programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP)
+
+thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy()
+
+standardised_asset_list = standardised_asset_list.merge(
+    programme_codes[["UPRN", "project_code", "programme_reference"]],
+    how="left",
+    left_on="landlord_property_id",
+    right_on="UPRN",
+).merge(
+    thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]],
+    how="left",
+    on="UPRN",
+)
+
+standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"])
+
+# We fill the project code for historical completions
+standardised_asset_list["project_code"] = np.where(
+    pd.isnull(standardised_asset_list["project_code"]) & (
+        standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED"
+    ) & (
+        ~pd.isnull(standardised_asset_list["hubspot_status"])
+    ),
+    "THRIVE-HISTORICAL",
+    standardised_asset_list["project_code"]
+)
+
+# Store as an excel
+filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - "
+            "reconciled.xlsx")
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+# Other tabs:
+block_analysis = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Block Analysis",
+)
+outcomes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Outcomes",
+)
+unmatched_submissions = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Unmatched Submissions",
+)
+unmatched_ecosurv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Unmatched Ecosurv",
+)
+
+with pd.ExcelWriter(filename) as writer:
+    standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+    block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False)
+    # If we have outcomes, we add a tab with the outcomes
+    outcomes.to_excel(writer, sheet_name="Outcomes", index=False)
+    unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
+    unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
+
+# A check, just comparing against the master tracker to make sure I have all of the installs
+asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Standardised Asset List",
+)
+
+master_tracker = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
+    "Warmfront).xlsx",
+    sheet_name="Master Tracker",
+    header=1
+)
+
+df = asset_list[["landlord_property_id", "hubspot_status"]].merge(
+    master_tracker[~pd.isnull(master_tracker['Date Completed'])][["UPRN", "Date Completed"]],
+    how="inner",
+    left_on="landlord_property_id",
+    right_on="UPRN"
+)
+
+df["hubspot_status"].value_counts()
+df[df["hubspot_status"] == "SUBMITTED TO INSTALLER"]
--- a/recommendations/tests/test_lighting_recommendations.py
+++ b/recommendations/tests/test_lighting_recommendations.py
@ -49,6 +49,6 @@ class TestLightingRecommendations:
                                                                                                    'lighting in all '
                                                                                                    'fixed outlets',
                                                                            'low-energy-lighting': 100},
-             'total': 240.24, 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3,
-             'preliminaries': 14.3, 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4,
-             'labour_cost': 63.0, 'survey': False}]
+             'total': 188.76000000000002, 'subtotal': 157.3, 'vat': 31.460000000000004, 'contingency': 14.3,
+             'material': 80.0, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0, 'survey': False}
+        ]