adding matcing from sumissions sheet to asset list

2026-07-27 23:35:01 +00:00 · 2025-04-13 21:39:35 +01:00 · 2025-04-13 21:39:35 +01:00 · 3cfe938e27
commit 3cfe938e27
parent 2d71ad25ef
16 changed files with 509 additions and 45 deletions
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -7,7 +7,7 @@
      <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
    </content>
-    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyNamespacePackagesService">
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,7 +3,7 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
  <component name="PyCharmProfessionalAdvertiser">
    <option name="shown" value="true" />
  </component>
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@ -4,6 +4,8 @@ import re
 import tiktoken
 from pprint import pprint
 from datetime import datetime
+
+from docutils.utils.math.tex2mathml_extern import blahtexml
 from openai import OpenAI
 import numpy as np
 import pandas as pd
@ -663,7 +665,10 @@ class AssetList:
            non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN)

        if self.old_format_non_intrusives_present:
-            non_intrusive_columns = self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES
+            # We check if we have the ECO Eligibility column, which we might not have
+            non_intrusive_columns = [
+                c for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES if c in self.standardised_asset_list.columns
+            ]

        self.keep_variables += non_intrusive_columns

@ -731,7 +736,7 @@ class AssetList:
                        'PIMSS EMPTY'
                    ]

-                    if pd.isnull(date_str) or date_str in known_errors:
+                    if pd.isnull(date_str) or date_str in known_errors or (date_str == 0):
                        return None

                    if isinstance(date_str, str):
@ -752,6 +757,10 @@ class AssetList:
                    if isinstance(date_str, datetime):
                        return date_str.year

+                    if isinstance(date_str, float):
+                        if str(int(date_str)).isdigit() & (len(str(int(date_str))) == 4):
+                            return int(date_str)
+
                    # Check if date_str is a year itself
                    if str(date_str).isdigit() & (len(str(date_str)) == 4):
                        return int(date_str)
@ -1325,7 +1334,7 @@ class AssetList:
        )
        self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] = (
            self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
-                ["electric storage heaters", "room heaters", "electric radiators"]
+                ["electric storage heaters", "room heaters", "electric radiators", "no heating"]
            )
        )

@ -2099,6 +2108,9 @@ class AssetList:
        nomatch = []
        for _, x in tqdm(self.outcomes.iterrows(), total=len(self.outcomes)):

+            if pd.isnull(x[outcomes_address]):
+                continue
+
            # Check if we have an id
            oid = x[outcomes_id] if outcomes_id is not None else None

@ -2120,6 +2132,8 @@ class AssetList:

            address_clean = x[outcomes_address].lower().replace(",", "").replace("  ", " ")

+            self.outcomes["Outcome"] = self.outcomes["Outcome"].str.lower()
+
            matched = self.standardised_asset_list[
                (self.standardised_asset_list[
                     self.STANDARD_FULL_ADDRESS
@ -2140,7 +2154,9 @@ class AssetList:
            ].copy()
            if not matched.empty:
                matched["houseno"] = matched.apply(
-                    lambda x: SearchEpc.get_house_number(x[self.STANDARD_ADDRESS_1], x[self.STANDARD_POSTCODE]),
+                    lambda x: SearchEpc.get_house_number(
+                        str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
+                    ),
                    axis=1
                )

@ -2155,8 +2171,6 @@ class AssetList:
                        }
                    )
                    continue
-                elif matched.shape[0] > 1:
-                    raise NotImplementedError("Check me")
                elif not matched.empty:
                    # Use levenstein distance to match
                    matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE]
@ -2254,19 +2268,123 @@ class AssetList:
                "SUBMISSION DATE" if "SUBMISSION DATE" in master_data.columns else "SUBMISSION DATE TO INSTALLERS"
            )

-            # We just need to check if any were cancelled
-            master_to_append = master_data[
-                ["UPRN", install_col, submission_col]
-            ].rename(
+            if "UPRN" in master_data.columns:
+                # We just need to check if any were cancelled
+                master_to_append = master_data[
+                    ["UPRN", install_col, submission_col]
+                ].rename(
+                    columns={
+                        "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
+                        install_col: "survey_status",
+                        submission_col: "submission_date"
+                    }
+                )
+                master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
+
+                master_surveyed.append(master_to_append)
+                continue
+
+            master_data["row_id"] = master_data.index
+
+            self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply(
+                lambda x: SearchEpc.get_house_number(
+                    str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE])
+                ),
+                axis=1
+            )
+
+            # Otherwise, we need to match algorithmically
+            logger.info("Matching master data to asset list")
+            matched = []
+            unmatched = []
+            for _, row in tqdm(master_data.iterrows(), total=len(master_data)):
+                if pd.isnull(row["POSTCODE"]):
+                    continue
+                postcode_no_space = row["POSTCODE"].strip().replace(" ", "").lower()
+
+                df = self.standardised_asset_list[
+                    (
+                        self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip().str.lower().str.replace(" ",
+                                                                                                                 "")
+                        == postcode_no_space
+                    )
+                ]
+
+                house_no = row["NO"]
+
+                if house_no in df["house_no"].values:
+                    df = df[df["house_no"] == house_no]
+                    if df.shape[0] != 1:
+                        # Levenstein distance
+
+                        if any(df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])):
+                            df = df[
+                                df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])
+                            ]
+                        else:
+                            # Levenstein distance
+                            df = df[
+                                df[self.STANDARD_FULL_ADDRESS].str.lower().apply(
+                                    lambda x: process.extractOne(
+                                        " ".join([row["NO"], row["Street / Block Name"], row["TOWN"]]).lower(),
+                                        x
+                                    )[1]
+                                ) > 90
+                                ]
+
+                            if df.shape[0] == 0:
+                                unmatched.append(row["row_id"])
+                                continue
+
+                        if any(df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
+                            " ".join([row["NO"], row["Street / Block Name"]]).lower()
+                        )):
+                            df = df[
+                                df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(
+                                    " ".join([row["NO"], row["Street / Block Name"]]).lower()
+                                )
+                            ]
+
+                        if any(
+                            df[self.STANDARD_PROPERTY_TYPE].str.contains(
+                                row["PROPERTY TYPE      As per table emailed"].split(" ")[-1].lower()
+                            )
+                        ):
+                            # We ignore "block of flats" entries
+                            df = df[
+                                df[self.STANDARD_PROPERTY_TYPE].str.contains(
+                                    row["PROPERTY TYPE      As per table emailed"].split(" ")[-1].lower()
+                                ) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats")
+                                ]
+
+                        if df.shape[0] != 1:
+                            # We have multiple matches
+                            raise NotImplementedError("FIX ME")
+                    matched.append(
+                        {
+                            "row_id": row["row_id"],
+                            self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0],
+                        }
+                    )
+
+            self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no")
+
+            # We match the "UPRN" which is the landlords ID, onto the master sheet
+            matched = pd.DataFrame(matched)
+            master_to_append = master_data[["row_id", install_col, submission_col]].merge(
+                matched, how="left", on="row_id"
+            ).rename(
                columns={
-                    "UPRN": self.STANDARD_LANDLORD_PROPERTY_ID,
                    install_col: "survey_status",
                    submission_col: "submission_date"
                }
            )
            master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
-
            master_surveyed.append(master_to_append)
+            unmatched_df = master_data[
+                master_data["row_id"].isin(unmatched)
+            ]
+            submissions_unmatched.append(unmatched_df)

        master_surveyed = pd.concat(master_surveyed)
        master_surveyed = master_surveyed[~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID])]
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -89,6 +89,42 @@ def app():
    # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
    # - Or the insulation required is loft/cavity (floors should be solid)

+    # Bromford
+    data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
+                   "Rebuild/Prepared data/")
+    data_filename = "asset_list.xlsx"
+    sheet_name = "Sheet1"
+    postcode_column = 'PostCode'
+    fulladdress_column = "FullAddress"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    address_cols_to_concat = []
+    missing_postcodes_method = None
+    landlord_year_built = "ConYear"
+    landlord_os_uprn = None
+    landlord_property_type = "AssetTypeDesc"
+    landlord_built_form = "PropTypeDesc"
+    landlord_wall_construction = "Construction type"
+    landlord_roof_construction = None
+    landlord_heating_system = "Heating Type"
+    landlord_existing_pv = None
+    landlord_property_id = "Asset"
+    landlord_sap = None
+    outcomes_filename = "outcomes.xlsx"
+    outcomes_sheetname = "Sheet1"
+    outcomes_postcode = "Postcode"
+    outcomes_houseno = "No"
+    outcomes_id = None
+    outcomes_address = "Address"
+    master_filepaths = [
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
+        "3 submissions.csv",
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
+        "4 submissions.csv",
+    ]
+    master_to_asset_list_filepath = None
+    phase = False
+
    # Torus
    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
    data_filename = "Torus Property Asset List - Phase 1.xlsx"
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@ -107,5 +107,42 @@ BUILT_FORM_MAPPINGS = {
    'Semi-detached': 'semi-detached',
    'Detached': 'detached',
    'Flat / maisonette': 'unknown',
-    '2014 onwards': 'unknown'
+    '2014 onwards': 'unknown',
+
+    'Semi Detached': 'semi-detached',
+    'End Terraced': 'end-terrace',
+    'Basement': 'basement',
+    'No': 'unknown',
+    'Mid Terrace': 'mid-terrace',
+    'Link Detached': 'detached',
+    'Mid Terraced': 'mid-terrace',
+    'Ground Floor': 'ground floor',
+    'End Terrace': 'end-terrace',
+    'Sheltrd Semi Det': 'semi-detached',
+    'Shop': 'unknown',
+    'Fourth Floor': 'mid-floor',
+    'Terraced': 'mid-terrace',
+    'Leasehold Terr': 'mid-terrace',
+    'Room': 'unknown',
+    'Second Floor': 'mid-floor',
+    'Third Floor': 'mid-floor',
+    'Office': 'unknown',
+    'First Floor Over Arch': 'ground floor',
+    '16-25 IND-PPL': 'unknown',
+    'Seventh Floor': 'top-floor',
+    'Sheltered': 'unknown',
+    'Shelt Bung End': 'end-terrace',
+    'Room In Shared Accommodation': 'unknown',
+    'Sheltred Bung Terrace': 'mid-terrace',
+    'Garage In Block': 'unknown',
+    'First Floor': 'ground floor',
+    'First Floor Over Garage': 'ground floor',
+    'Leasehold': 'unknown',
+    'Sheltred Bung': 'unknown',
+    'Garage': 'unknown',
+    'Sixth Floor': 'top-floor',
+    'Sheltered Bung': 'semi-detached',
+    'Guest': 'unknown',
+    'Fifth Floor': 'mid-floor'
+
 }
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@ -23,7 +23,8 @@ STANDARD_HEATING_SYSTEMS = {
    'gas combi boiler',
    'unknown',
    "electric ceiling",
-    "electric underfloor"
+    "electric underfloor",
+    "no heating"
 }

 HEATING_MAPPINGS = {
@ -87,7 +88,7 @@ HEATING_MAPPINGS = {
    'Heat pump (air) Electricity': 'air source heat pump',
    'Room heaters Electricity': 'electric radiators',
    'Room heaters Oil': 'room heaters',
-    'No heating system ND': 'unknown',
+    'No heating system ND': 'no heating',
    'Heat pump (wet) Electricity': 'ground source heat pump',
    'Room heaters Biomass': 'room heaters',
    'ND Solid fuel': 'unknown',
@ -98,11 +99,11 @@ HEATING_MAPPINGS = {
    'Storage heating Electricity': 'electric storage heaters',
    'ND Electricity': 'unknown',
    'Community heating Community (non-gas)': 'district heating',
-    'No heating system N/A': 'unknown',
+    'No heating system N/A': 'no heating',
    'Boiler Solid fuel': 'boiler - other fuel',
    'Community heating Community (mains gas)': 'communal gas boiler',
    'Boiler Biomass': 'boiler - other fuel',
-    'No heating system Mains gas': 'unknown',
+    'No heating system Mains gas': 'no heating',

    'Storage heaters': 'electric storage heaters',
    'Air Source': 'air source heat pump',
@ -170,5 +171,36 @@ HEATING_MAPPINGS = {
    'Heat pump (wet)': 'air source heat pump',
    'Electric ceiling heating': 'electric ceiling',
    'Electric under floor heating': 'electric underfloor',
-    'Community heating': 'district heating'
+    'Community heating': 'district heating',
+
+    'Wet - Radiators Air Source Heat Pump': 'air source heat pump',
+    'Wet - Radiators Electric': 'electric boiler',
+    'Storage Heaters': 'high heat retention storage heaters',
+    'Wet - Radiators Oil': 'oil boiler',
+    'Communal Wet - Radiators Gas': 'communal gas boiler',
+    'Electric - Storage/Panel Heaters Electric': 'electric storage heaters',
+    'Gas Central Heating': 'gas combi boiler',
+    'Wet - Radiators Solar': 'other',
+    'Electric - Storage/Panel Heaters LPG': 'electric storage heaters',
+    'No Heating Solid': 'no heating',
+    'Wet - Underfloor Gas': 'gas condensing boiler',
+    'No Heating Electric': 'no heating',
+    'Oil Fired Central Heating': 'oil boiler',
+    'Warm Air Gas': 'other',
+    'Communal Boilers': 'communal gas boiler',
+    'Wet - Radiators Gas': 'gas combi boiler',
+    'Wet - Radiators Solid': 'solid fuel',
+    'Wet - Radiators LPG': 'other',
+    'No Heating Gas': 'no heating',
+    'No Heating': 'no heating',
+    'Panel Heaters': 'electric radiators',
+    'Rointe Electric Heating': 'electric storage heaters',
+    'Underfloor Heating': 'electric underfloor',
+    'Air Source Heating': 'air source heat pump',
+    'Warm Air Electric': 'other',
+    'Communal Wet - Radiators Electric': 'communal gas boiler',
+    'Wet - Underfloor Solar': 'other',
+    'No Heating Required Gas': 'unknown',
+    'Electric - Storage/Panel Heaters Gas': 'electric storage heaters',
+    'Electric - Storage/Panel Heaters Solid': 'electric storage heaters'
 }
--- a/asset_list/mappings/property_type.py
+++ b/asset_list/mappings/property_type.py
@ -151,5 +151,32 @@ PROPERTY_MAPPING = {
    'Flat: Enclosed End Terrace: Mid Floor': 'flat',
    'Flat: Enclosed End Terrace: Ground Floor': 'flat',
    'Flat: Enclosed Mid Terrace: Top Floor': 'flat',
-    '2013 onwards': 'unknown'
+    '2013 onwards': 'unknown',
+
+    'House 2 Storey': 'house',
+    'Bung': 'bungalow',
+    'House 3 Storey': 'house',
+    'Shared Flat': 'flat',
+    'd': 'unknown',
+    'Mais': 'maisonette',
+    'e': 'unknown',
+    'Shared House': 'house',
+    'House 4 Storey': 'house',
+    'Shared Bungalow': 'bungalow',
+    'Detch': 'house',
+    'Shop': 'other',
+    'Terr': 'house',
+    'Terrace': 'house',
+    'Description': 'unknown',
+    'Hse': 'house',
+    'Room': 'other',
+    'Office': 'other',
+    'Room In Shared Accommodation': 'other',
+    'Apartment': 'flat',
+    'm': 'unknown',
+    'Garage': 'other',
+    'Parking Space': 'other',
+    'Community Centre': 'other',
+    'Communal Facility': 'other',
+    'Semi': 'house'
 }
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@ -22,5 +22,6 @@ ROOF_CONSTRUCTION_MAPPINGS = {
    'ND (inferred)': 'unknown',
    '2018 onwards': 'unknown',
    'Pitched (vaulted ceiling)': 'pitched insulated',
-    np.nan: "unknown"
+    np.nan: "unknown",
+    None: "unknown"
 }
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@ -157,5 +157,14 @@ WALL_CONSTRUCTION_MAPPINGS = {
    'Timber frame': 'timber frame unknown insulation',
    '2017 onwards': 'new build - average thermal transmittance',
    'ND (inferred)': 'unknown',
-    'Flat / maisonette': 'other'
+    'Flat / maisonette': 'other',
+
+    'Other': 'other',
+    'Timber Frame': 'timber frame unknown insulation',
+    'Cavity Wall': 'cavity unknown insulation',
+    'Non-Traditional': 'system built',
+    'PRC': 'system built',
+    'Cross Wall': 'system built',
+    'Solid Wall': 'solid brick unknown insulation',
+    'Traditional': 'other'
 }
--- a/backend/Property.py
+++ b/backend/Property.py
@ -107,7 +107,10 @@ class Property:
        # cost and instead, provide a message that the measure has already been installed

        self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
-        self.non_invasive_recommendations = non_invasive_recommendations
+        self.non_invasive_recommendations = (
+            non_invasive_recommendations['recommendations'] if
+            non_invasive_recommendations else []
+        )
        # This is a list of measures that have been recommended for the property
        if isinstance(measures, list):
            self.measures = measures
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@ -83,7 +83,8 @@ class PlanTriggerRequest(BaseModel):
    exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
    inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
    # This is a list of measures that we want to be included, if they are options
-    required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
+    # Default to empty
+    required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=[], min_length=1)

    scenario_name: Optional[str] = ""
    multi_plan: Optional[bool] = False
--- a/etl/customers/bromford/data_cleanup.py
+++ b/etl/customers/bromford/data_cleanup.py
@ -0,0 +1,192 @@
+"""
+12th April 2025
+This script attempts to clean up the various pieces of data we have for Bromford, with the intention of producing a
+standardised asset list
+"""
+
+import pandas as pd
+
+# Step 1
+# The inspectons data is spread across three different files. We attempt to produce one finalised asset list, with
+# comprehensive inspections
+
+# Primary asset list
+asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford Asset "
+    "List.xlsx",
+    sheet_name="Asset List"
+)
+
+#
+inspections_1 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
+    "MDS.xlsx",
+    sheet_name="Data list"
+)
+inspections_1["Heating Type"] = (inspections_1["Heating Type"] + " " + inspections_1["Heating fuel"]).str.strip()
+
+inspections_2 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
+    "MERLIN LANE.xlsx",
+    sheet_name="Report"
+)
+inspections_2["AssetTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[-1]
+inspections_2["PropTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[:-1].str.join(" ")
+
+inspections_3 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
+    "SEVERN VALE - KLARKE.xlsx",
+    sheet_name="Asset report"
+)
+
+inspections_3["FullAddress"] = inspections_3["T1_Address1"] + ", " + inspections_3["T1_Address2"]
+
+# On inspections 3, we have multiple sheets which describe the heating
+heating_systems = []
+for sheet_name in [
+    "Storage Heaters", "No Heating", "Underfloor Heating", "Rointe Electric Heating", "Air Source Heating",
+    "Gas Central Heating", "Electric Boiler", "Oil Fired Central Heating",
+    "Communal Boilers", "Panel Heaters"
+]:
+    df = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
+        "Rebuild/Inspections/BROMFORD "
+        "SEVERN VALE - KLARKE.xlsx",
+        sheet_name=sheet_name
+    )
+    df = df[["UPRN"]]
+    df["Heating Type"] = sheet_name
+    heating_systems.append(df)
+
+heating_systems = pd.concat(heating_systems)
+# We have no clue which one is correct, we have some dupes
+heating_systems = heating_systems.drop_duplicates("UPRN")
+heating_systems = heating_systems.rename(columns={"UPRN": "Asset"})
+heating_systems["Asset"] = heating_systems["Asset"].astype(int)
+
+inspections_3 = inspections_3.merge(heating_systems, how="left", on="Asset")
+
+# Create a consolidated inspections sheet
+inspections = pd.concat(
+    [
+        inspections_1[["Asset", "Construction type", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
+        inspections_2[["Asset", "Construction type", "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
+        inspections_3[["Asset", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
+    ]
+)
+
+inspections_address_data = pd.concat(
+    [
+        inspections_1[
+            ["Asset", "FullAddress", "PostCode", "ConYear", "Beds", "AssetTypeDesc", "PropTypeDesc", 'ManAreaDesc', ]
+        ],
+        inspections_2[
+            ['Asset', 'FullAddress', 'AccomType', "AssetTypeDesc", "PropTypeDesc", 'ConYear', 'Postcode']
+        ].rename(columns={"Postcode": "PostCode"}),
+        inspections_3[
+            ['Asset', "FullAddress", 'T1_Postcode', 'T1_Build Year', 'T1_AssetType']
+        ].rename(
+            columns={"T1_Postcode": "PostCode", "T1_Build Year": "ConYear", "T1_AssetType": "AssetTypeDesc"}
+        ),
+    ]
+)
+
+# Remove some error values
+inspections = inspections[~inspections["Asset"].isin(
+    [
+        "They're all green partial fill they're all green this",
+        "South Staffordshire District Council",
+        'Blk Milton Crt F9-10, Perton, Wolverhampton'
+    ]
+)]
+
+inspections["Asset"] = inspections["Asset"].astype(str)
+asset_list["Asset"] = asset_list["Asset"].astype(str)
+inspections_address_data["Asset"] = inspections_address_data["Asset"].astype(str)
+inspections['WFT Findings'] = inspections['WFT Findings'].replace(r'^\s*$', pd.NA, regex=True)
+
+# We have some cases where the inspetions data has dupes on Asset (the ID column). We take the instance that is
+# populated
+inspections = inspections.sort_values(by='WFT Findings', na_position='last')
+inspections = inspections.drop_duplicates(subset='Asset', keep='first')
+
+# We have dupes in the asset list
+asset_list = asset_list.drop_duplicates("Asset")
+
+# Merge on
+missed_asset_ids = inspections[
+    ~inspections["Asset"].isin(asset_list["Asset"].values)
+]["Asset"].values
+
+missed_assets = inspections_address_data[
+    inspections_address_data["Asset"].isin(missed_asset_ids)
+]
+missed_assets = missed_assets.drop_duplicates("Asset")
+
+# We produce a larger asset list
+asset_list = pd.concat([asset_list, missed_assets])
+
+asset_list = asset_list.merge(
+    inspections, how="left", on="Asset"
+)
+asset_list["WFT Findings"] = asset_list["WFT Findings"].fillna("No Inspections Note")
+
+# Store
+# asset_list.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
+#     "data/asset_list.xlsx"
+# )
+
+# We now prepare outcomes into a single file
+pv_outcomes = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford PV "
+    "Outcomes.csv",
+    encoding='cp1252'
+)
+pv_outcomes["measure_type"] = "solar"
+
+other_outcomes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/(Bromford) "
+    "15.04.2024.xlsx",
+    sheet_name="ECO4 & GBIS",
+    header=1
+)
+other_outcomes["measure_type"] = "cwi"
+
+combined_outcomes = pd.concat(
+    [
+        other_outcomes[["NO", "ADDRESS", "POSTCODE", "WEEK COMMENCING", "OUTCOMES", "NOTES"]].rename(
+            columns={
+                "NO": "No", "ADDRESS": "Address", "POSTCODE": "Postcode", "WEEK COMMENCING": "Week Commencing",
+                "OUTCOMES": "Outcome", "NOTES": "Notes"
+            }
+        ),
+        pv_outcomes[['No', 'Address', 'Postcode', "Week Commencing", "Outcome", "Notes"]]
+    ]
+)
+
+# Store
+# combined_outcomes.to_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
+#     "data/outcomes.xlsx"
+# )
+
+# Submissions sheet -
+eco3_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 Submissions.csv",
+    encoding='cp1252'
+)
+# Get rid of the unnamed columns
+unnamed_columns = [c for c in eco3_submissions.columns if "Unnamed: " in c]
+eco3_submissions = eco3_submissions.drop(columns=unnamed_columns)
+# Store
+eco3_submissions.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 submissions.csv",
+    index=False
+)
+
+eco4_submissions = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 4 submissions.csv",
+)
+
+same_cols = [c for c in eco4_submissions.columns if c in eco3_submissions.columns]
--- a/etl/customers/remote_assessments/app.py
+++ b/etl/customers/remote_assessments/app.py
@ -4,7 +4,7 @@ from dotenv import load_dotenv
 from utils.s3 import save_csv_to_s3
 from etl.find_my_epc.AssetListEpcData import AssetListEpcData

-PORTFOLIO_ID = 140
+PORTFOLIO_ID = 141
 USER_ID = 8

 load_dotenv(dotenv_path="backend/.env")
@ -19,17 +19,20 @@ def app():

    asset_list = [
        {
-            "address": "Brow Cottage",
-            "postcode": "YO18 7PZ",
-            "uprn": 10007630752,
-            "property_type": "House",
-            "built_form": "Semi-Detached",
+            "address": "196 Merrow Street",
+            "postcode": "SE17 2NP",
+            "uprn": 200003423454,
            "patch": True
        },
        {
-            "address": "Wyburn",
-            "postcode": "DT1 2LL",
-            "uprn": 100040630290
+            "address": "65 Liverpool Grove",
+            "postcode": "SE17 2HP",
+            "uprn": 200003423194
+        },
+        {
+            "address": "2 Brettell Street",
+            "postcode": "SE17 2NZ",
+            "uprn": 200003423607
        },
    ]
    asset_list = pd.DataFrame(asset_list)
@ -71,12 +74,16 @@ def app():

    valuation_data = [
        {
-            "valuation": 469_000,
-            "uprn": 10007630752,
+            "valuation": 339_000,
+            "uprn": 200003423454,
        },
        {
-            "valuation": 373_000,
-            "uprn": 100040630290
+            "valuation": 374_000,
+            "uprn": 200003423194
+        },
+        {
+            "valuation": 719_000,
+            "uprn": 200003423607
        },
    ]
    # Store valuation data to s3
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@ -1,7 +1,7 @@
 import os
 import re
 import openpyxl
-import Levenshtein
+from fuzzywuzzy import fuzz
 from pathlib import Path
 import msgpack
 from datetime import datetime
@ -2771,7 +2771,8 @@ class DataLoader:
        match_to = [x.replace(" ", "") for x in match_to]

        # Perform matching between full key and match_to
-        distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+        distances = [100 - fuzz.ratio(matching_string, s) for s in match_to]
+        
        best_match_index = distances.index(min(distances))
        # We might want to consider a threshold for the distance, however for the momeny,
        # we don't consider this for the moment
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@ -635,7 +635,7 @@ class Recommendations:
                    # By limiting here, we don't change the value in current_phase_values. This means that the
                    # future recommendations won't have an impact that is too large
                    li_sap_limit = RoofRecommendations.get_loft_insulation_sap_limit(
-                        property_instance.data["roof-energy-eff"], property_instance.data["extension-count"]
+                        property_instance.data["roof-energy-eff"], property_instance.roof["insulation_thickness"]
                    )
                    if li_sap_limit is not None:
                        property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit)
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@ -64,16 +64,16 @@ class RoofRecommendations:
        )

    @classmethod
-    def get_loft_insulation_sap_limit(cls, roof_energy_eff, extension_count):
+    def get_loft_insulation_sap_limit(cls, roof_energy_eff, existing_thickness):
        """
        Get the SAP limit for loft insulation
        :param roof_energy_eff:
        :return:
        """

-        if extension_count == 0:
-            # No limit
-            return None
+        if str(existing_thickness).isdigit():
+            if float(existing_thickness) >= 250:
+                return 0

        if roof_energy_eff in ["Good", "Very Good"]:
            return 1