diff --git a/.devcontainer/backend/Dockerfile b/.devcontainer/backend/Dockerfile
index 6a1cc120..59aa0cb6 100644
--- a/.devcontainer/backend/Dockerfile
+++ b/.devcontainer/backend/Dockerfile
@@ -64,4 +64,13 @@ RUN apt install -y wget gnupg2 lsb-release
 RUN echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
 RUN wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
 RUN apt update
-RUN apt install -y postgresql-14
\ No newline at end of file
+RUN apt install -y postgresql-14
+
+# Install Claude
+USER ${USER}
+RUN curl -fsSL https://claude.ai/install.sh | bash \
+    && export PATH="/home/${USER}/.local/bin:${PATH}" \
+    && claude plugin marketplace add JuliusBrussee/caveman \
+    && claude plugin install caveman@caveman
+ENV PATH="/home/vscode/.local/bin:${PATH}"
+USER root
\ No newline at end of file
diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py
index 71763790..a516a1df 100644
--- a/backend/app/db/models/uploaded_file.py
+++ b/backend/app/db/models/uploaded_file.py
@@ -16,6 +16,7 @@ class FileTypeEnum(enum.Enum):
     PAS_2023_OCCUPANCY = "pas_2023_occupancy"
     ECMK_SITE_NOTE = "ecmk_site_note"
     ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
+    ECMK_SURVEY_XML = "ecmk_survey_xml"
 
 
 class FileSourceEnum(enum.Enum):
@@ -37,6 +38,7 @@ class UploadedFile(Base):
     landlord_property_id = Column(Text, nullable=True)
     uprn = Column(BigInteger, nullable=True)
     hubspot_listing_id = Column(BigInteger, nullable=True)
+    hubspot_deal_id = Column(Text, nullable=True)
 
     file_type = Column(
         SqlEnum(
diff --git a/backend/ecmk_fetcher/excel_writer.py b/backend/ecmk_fetcher/excel_writer.py
new file mode 100644
index 00000000..f290614b
--- /dev/null
+++ b/backend/ecmk_fetcher/excel_writer.py
@@ -0,0 +1,53 @@
+import os
+from typing import Any
+
+from openpyxl import Workbook, load_workbook
+from openpyxl.worksheet.worksheet import Worksheet
+
+
+def write_row(file_path: str, row_data: dict[str, Any]) -> None:
+    new_keys = list(row_data.keys())
+
+    if not os.path.exists(file_path):
+        wb = Workbook()
+        ws: Worksheet = wb.active  # type: ignore[assignment]
+        ws.append(new_keys)
+        ws.append(list(row_data.values()))
+        wb.save(file_path)
+        return
+
+    wb = load_workbook(file_path)
+    ws = wb.active  # type: ignore[assignment]
+
+    # Build a mutable header list and insert new columns using insert_cols so
+    # that existing row data shifts along with the headers.
+    # Filter out None to guard against blank columns in the source file.
+    headers: list[str] = [cell.value for cell in ws[1] if cell.value is not None]  # type: ignore[misc]
+
+    for key in new_keys:
+        if key in headers:
+            continue
+
+        # Find the first key that comes after this one in new_keys that already
+        # exists in headers — insert before it to keep columns logically grouped.
+        insert_before: str | None = None
+        found = False
+        for k in new_keys:
+            if k == key:
+                found = True
+                continue
+            if found and k in headers:
+                insert_before = k
+                break
+
+        if insert_before is not None:
+            col_idx = headers.index(insert_before) + 1  # 1-based
+            ws.insert_cols(col_idx)
+            ws.cell(row=1, column=col_idx, value=key)
+            headers.insert(col_idx - 1, key)
+        else:
+            headers.append(key)
+            ws.cell(row=1, column=len(headers), value=key)
+
+    ws.append([row_data.get(col) for col in headers])
+    wb.save(file_path)
diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py
index 2f122080..4f8c24ea 100644
--- a/backend/ecmk_fetcher/processor.py
+++ b/backend/ecmk_fetcher/processor.py
@@ -26,13 +26,17 @@ from backend.ecmk_fetcher.browser import (
 )
 from backend.ecmk_fetcher.reports import (
     REPORT_TYPES,
+    FileDownloadButtonType,
     build_property_id,
     map_report_type_to_db_file_type,
 )
+from backend.ecmk_fetcher.excel_writer import write_row
 from backend.ecmk_fetcher.upload import (
+    upload_excel_to_sharepoint,
     upload_file_to_s3_and_update_db,
     upload_file_to_sharepoint,
 )
+from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
 from utils.logger import setup_logger
 from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
 from utils.sharepoint.domna_sites import DomnaSites
@@ -59,6 +63,15 @@ def run_job() -> None:
     )
 
     sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
+    sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
+
+    DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
+    local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME)
+
+    sharepoint_client.download_file(
+        sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
+        local_path=local_dimensions_path,
+    )
 
     s3_bucket: str = "retrofit-energy-assessments-dev"
 
@@ -141,15 +154,30 @@ def run_job() -> None:
                             )
 
                             try:
-                                upload_file_to_sharepoint(
-                                    client=sharepoint_client,
-                                    file_path=file_path,
-                                    base_path=sharepoint_base_path,
-                                    subpath=sharepoint_address,
-                                )
-                                logger.info(
-                                    f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
-                                )
+                                if report_type == FileDownloadButtonType.RAW_XML.value:
+                                    with open(file_path, "r", encoding="utf-8") as f:
+                                        xml_string = f.read()
+                                    details = parse_rdsap(xml_string)
+                                    row_data = flatten_sap_property(details)
+                                    write_row(local_dimensions_path, row_data)
+                                    upload_excel_to_sharepoint(
+                                        client=sharepoint_client,
+                                        file_path=local_dimensions_path,
+                                        sharepoint_path=sharepoint_excel_path,
+                                    )
+                                    logger.info(
+                                        f"Written dimensions row and uploaded Dimensions.xlsx for {address}"
+                                    )
+                                else:
+                                    upload_file_to_sharepoint(
+                                        client=sharepoint_client,
+                                        file_path=file_path,
+                                        base_path=sharepoint_base_path,
+                                        subpath=sharepoint_address,
+                                    )
+                                    logger.info(
+                                        f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
+                                    )
 
                                 # Upload to s3 and update db
                                 upload_file_to_s3_and_update_db(
diff --git a/backend/ecmk_fetcher/reports.py b/backend/ecmk_fetcher/reports.py
index d8d11d50..d2f8ea52 100644
--- a/backend/ecmk_fetcher/reports.py
+++ b/backend/ecmk_fetcher/reports.py
@@ -14,6 +14,7 @@ class FileDownloadButtonType(Enum):
 REPORT_TYPES = [
     FileDownloadButtonType.ASSESSOR_HUB_SITENOTE_REPORT.value,
     FileDownloadButtonType.SITENOTE_REPORT.value,
+    FileDownloadButtonType.RAW_XML.value,
 ]
 
 
@@ -23,6 +24,8 @@ def map_report_type_to_db_file_type(report_type: int) -> FileTypeEnum:
             return FileTypeEnum.ECMK_SITE_NOTE
         case FileDownloadButtonType.SITENOTE_REPORT.value:
             return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE
+        case FileDownloadButtonType.RAW_XML.value:
+            return FileTypeEnum.ECMK_SURVEY_XML
         case _:
             raise ValueError("Unknown report type")
 
diff --git a/backend/ecmk_fetcher/tests/test_excel_writer.py b/backend/ecmk_fetcher/tests/test_excel_writer.py
new file mode 100644
index 00000000..3f730951
--- /dev/null
+++ b/backend/ecmk_fetcher/tests/test_excel_writer.py
@@ -0,0 +1,123 @@
+import os
+import pathlib
+import pytest
+from openpyxl import load_workbook
+from openpyxl.worksheet.worksheet import Worksheet
+
+from backend.ecmk_fetcher.excel_writer import write_row
+
+
+@pytest.fixture
+def xlsx_path(tmp_path: pathlib.Path) -> str:
+    return str(tmp_path / "output.xlsx")
+
+
+def _active_sheet(file_path: str) -> Worksheet:
+    ws = load_workbook(file_path).active
+    assert isinstance(ws, Worksheet)
+    return ws
+
+
+def test_write_row_creates_file(xlsx_path: str):
+    # arrange
+    row = {
+        "address": "1 Fake Avenue, AB24 5CD",
+        "property_type": "House",
+        "main_dwelling_floor_1_area_m2": 43.61,
+    }
+
+    # act
+    write_row(xlsx_path, row)
+
+    # assert
+    assert os.path.exists(xlsx_path)
+    ws = _active_sheet(xlsx_path)
+    assert [c.value for c in ws[1]] == list(row.keys())
+    assert [c.value for c in ws[2]] == list(row.values())
+
+
+def test_write_row_appends_to_existing(xlsx_path: str):
+    # arrange
+    row_a = {
+        "address": "1 Fake Avenue, AB24 5CD",
+        "property_type": "House",
+        "main_dwelling_floor_1_area_m2": 43.61,
+    }
+    row_b = {
+        "address": "2 Other Street, XY1 2AB",
+        "property_type": "Flat",
+        "main_dwelling_floor_1_area_m2": 30.0,
+    }
+
+    # act
+    write_row(xlsx_path, row_a)
+    write_row(xlsx_path, row_b)
+
+    # assert
+    ws = _active_sheet(xlsx_path)
+    assert ws.max_row == 3  # 1 header + 2 data rows
+    assert [c.value for c in ws[1]] == list(row_a.keys())
+    assert [c.value for c in ws[2]] == list(row_a.values())
+    assert [c.value for c in ws[3]] == list(row_b.values())
+
+
+def test_write_row_inserts_new_columns_at_logical_positions(xlsx_path: str):
+    # arrange
+    # First row: main_dwelling floor 1 + roof
+    # Second row: also has main_dwelling floor 2 — should be inserted between floor 1 and roof,
+    # not appended to the end
+    row_a = {
+        "address": "1 Fake Avenue, AB24 5CD",
+        "property_type": "House",
+        "main_dwelling_floor_1_area_m2": 43.61,
+        "main_dwelling_floor_1_height_m": 2.46,
+        "main_dwelling_roof_construction": 4,
+    }
+    row_b = {
+        "address": "2 Other Street, XY1 2AB",
+        "property_type": "House",
+        "main_dwelling_floor_1_area_m2": 50.0,
+        "main_dwelling_floor_1_height_m": 2.5,
+        "main_dwelling_floor_2_area_m2": 48.0,
+        "main_dwelling_floor_2_height_m": 2.4,
+        "main_dwelling_roof_construction": 4,
+    }
+
+    # act
+    write_row(xlsx_path, row_a)
+    write_row(xlsx_path, row_b)
+
+    # assert
+    ws = _active_sheet(xlsx_path)
+
+    assert [c.value for c in ws[1]] == [
+        "address",
+        "property_type",
+        "main_dwelling_floor_1_area_m2",
+        "main_dwelling_floor_1_height_m",
+        "main_dwelling_floor_2_area_m2",  # inserted before roof, not at end
+        "main_dwelling_floor_2_height_m",
+        "main_dwelling_roof_construction",
+    ]
+
+    # row_a had no floor_2 data — those cells should be empty
+    assert [c.value for c in ws[2]] == [
+        "1 Fake Avenue, AB24 5CD",
+        "House",
+        43.61,
+        2.46,
+        None,  # main_dwelling_floor_2_area_m2
+        None,  # main_dwelling_floor_2_height_m
+        4,
+    ]
+
+    # row_b should be fully populated
+    assert [c.value for c in ws[3]] == [
+        "2 Other Street, XY1 2AB",
+        "House",
+        50.0,
+        2.5,
+        48.0,
+        2.4,
+        4,
+    ]
diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py
new file mode 100644
index 00000000..3695b09d
--- /dev/null
+++ b/backend/ecmk_fetcher/tests/test_xml_processor.py
@@ -0,0 +1,329 @@
+from backend.ecmk_fetcher.xml_processor import (
+    SapPropertyDetails,
+    flatten_sap_property,
+    parse_rdsap,
+)
+
+
+SAMPLE_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
+<Report-Header>
+    <Property>
+        <Address>
+            <Address-Line-1>1</Address-Line-1>
+            <Address-Line-2>Fake Avenue</Address-Line-2>
+            <Post-Town>Random</Post-Town>
+            <Postcode>AB24 5CD</Postcode>
+        </Address>
+    </Property>
+</Report-Header>
+
+<SAP-Data>
+    <SAP-Property-Details>
+        <Property-Type>0</Property-Type>
+
+        <SAP-Building-Parts>
+
+            <SAP-Building-Part>
+                <Building-Part-Number>1</Building-Part-Number>
+                <Identifier>Main Dwelling</Identifier>
+                <Construction-Age-Band>C</Construction-Age-Band>
+                <Floor-Heat-Loss>7</Floor-Heat-Loss>
+
+                <Roof-Construction>4</Roof-Construction>
+                <Roof-Insulation-Location>2</Roof-Insulation-Location>
+                <Roof-Insulation-Thickness>100mm</Roof-Insulation-Thickness>
+
+                <Wall-Construction>4</Wall-Construction>
+                <Wall-Insulation-Type>4</Wall-Insulation-Type>
+
+                <SAP-Floor-Dimensions>
+                    <SAP-Floor-Dimension>
+                        <Heat-Loss-Perimeter quantity="metres">25.31</Heat-Loss-Perimeter>
+                        <Room-Height quantity="metres">2.46</Room-Height>
+                        <Total-Floor-Area quantity="square metres">43.61</Total-Floor-Area>
+                        <Floor>0</Floor>
+                        <Party-Wall-Length>0</Party-Wall-Length>
+                    </SAP-Floor-Dimension>
+
+                    <SAP-Floor-Dimension>
+                        <Heat-Loss-Perimeter quantity="metres">26.16</Heat-Loss-Perimeter>
+                        <Room-Height quantity="metres">2.44</Room-Height>
+                        <Total-Floor-Area quantity="square metres">42.33</Total-Floor-Area>
+                        <Floor>1</Floor>
+                        <Party-Wall-Length>0</Party-Wall-Length>
+                    </SAP-Floor-Dimension>
+                </SAP-Floor-Dimensions>
+
+            </SAP-Building-Part>
+
+            <SAP-Building-Part>
+                <Building-Part-Number>2</Building-Part-Number>
+                <Identifier>Extension</Identifier>
+                <Construction-Age-Band>C</Construction-Age-Band>
+
+                <Roof-Construction>8</Roof-Construction>
+                <Roof-Insulation-Location>7</Roof-Insulation-Location>
+                <Sloping-Ceiling-Insulation-Thickness>AB</Sloping-Ceiling-Insulation-Thickness>
+
+                <Wall-Construction>3</Wall-Construction>
+                <Wall-Insulation-Type>4</Wall-Insulation-Type>
+
+                <SAP-Floor-Dimensions>
+                    <SAP-Floor-Dimension>
+                        <Heat-Loss-Perimeter quantity="metres">6.85</Heat-Loss-Perimeter>
+                        <Room-Height quantity="metres">2.24</Room-Height>
+                        <Total-Floor-Area quantity="square metres">4.46</Total-Floor-Area>
+                        <Floor>0</Floor>
+                        <Party-Wall-Length>0</Party-Wall-Length>
+                    </SAP-Floor-Dimension>
+                </SAP-Floor-Dimensions>
+
+            </SAP-Building-Part>
+
+        </SAP-Building-Parts>
+
+    </SAP-Property-Details>
+</SAP-Data>
+</RdSAP-Report>
+"""
+
+
+NO_ROOF_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
+<Report-Header>
+    <Property>
+        <Address>
+            <Address-Line-1>5</Address-Line-1>
+            <Post-Town>Somewhere</Post-Town>
+            <Postcode>XY1 2AB</Postcode>
+        </Address>
+    </Property>
+</Report-Header>
+<SAP-Data>
+    <SAP-Property-Details>
+        <Property-Type>0</Property-Type>
+        <SAP-Building-Parts>
+            <SAP-Building-Part>
+                <Identifier>Main Dwelling</Identifier>
+                <SAP-Floor-Dimensions>
+                    <SAP-Floor-Dimension>
+                        <Heat-Loss-Perimeter quantity="metres">10.0</Heat-Loss-Perimeter>
+                        <Room-Height quantity="metres">2.5</Room-Height>
+                        <Total-Floor-Area quantity="square metres">50.0</Total-Floor-Area>
+                        <Floor>0</Floor>
+                        <Party-Wall-Length>3.0</Party-Wall-Length>
+                    </SAP-Floor-Dimension>
+                </SAP-Floor-Dimensions>
+            </SAP-Building-Part>
+        </SAP-Building-Parts>
+    </SAP-Property-Details>
+</SAP-Data>
+</RdSAP-Report>
+"""
+
+
+def test_parse_rdsap_contract():
+    # arrange + act
+    result: SapPropertyDetails = parse_rdsap(SAMPLE_XML)
+
+    # assert
+    assert result == {
+        "reference": "1AB245CD",
+        "address": "1, Fake Avenue, Random, AB24 5CD",
+        "property_type": "House",
+        "building_parts": [
+            {
+                "identifier": "Main Dwelling",
+                "floors": [
+                    {
+                        "area_m2": 43.61,
+                        "height_m": 2.46,
+                        "heat_loss_perimeter_m": 25.31,
+                        "party_wall_length_m": 0.0,
+                    },
+                    {
+                        "area_m2": 42.33,
+                        "height_m": 2.44,
+                        "heat_loss_perimeter_m": 26.16,
+                        "party_wall_length_m": 0.0,
+                    },
+                ],
+                "roof": {
+                    "construction": 4,
+                    "insulation_location": 2,
+                    "insulation_thickness_mm": 100.0,
+                },
+            },
+            {
+                "identifier": "Extension",
+                "floors": [
+                    {
+                        "area_m2": 4.46,
+                        "height_m": 2.24,
+                        "heat_loss_perimeter_m": 6.85,
+                        "party_wall_length_m": 0.0,
+                    }
+                ],
+                "roof": {
+                    "construction": 8,
+                    "insulation_location": 7,
+                },
+            },
+        ],
+    }
+
+
+ND_THICKNESS_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
+<Report-Header>
+    <Property>
+        <Address>
+            <Address-Line-1>1</Address-Line-1>
+            <Post-Town>Somewhere</Post-Town>
+            <Postcode>AB1 2CD</Postcode>
+        </Address>
+    </Property>
+</Report-Header>
+<SAP-Data>
+    <SAP-Property-Details>
+        <Property-Type>0</Property-Type>
+        <SAP-Building-Parts>
+            <SAP-Building-Part>
+                <Identifier>Main Dwelling</Identifier>
+                <Roof-Construction>4</Roof-Construction>
+                <Roof-Insulation-Location>2</Roof-Insulation-Location>
+                <Roof-Insulation-Thickness>ND</Roof-Insulation-Thickness>
+                <SAP-Floor-Dimensions>
+                    <SAP-Floor-Dimension>
+                        <Heat-Loss-Perimeter quantity="metres">10.0</Heat-Loss-Perimeter>
+                        <Room-Height quantity="metres">2.5</Room-Height>
+                        <Total-Floor-Area quantity="square metres">50.0</Total-Floor-Area>
+                        <Floor>0</Floor>
+                        <Party-Wall-Length>0</Party-Wall-Length>
+                    </SAP-Floor-Dimension>
+                </SAP-Floor-Dimensions>
+            </SAP-Building-Part>
+        </SAP-Building-Parts>
+    </SAP-Property-Details>
+</SAP-Data>
+</RdSAP-Report>
+"""
+
+ND_INSULATION_LOCATION_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
+<Report-Header>
+    <Property>
+        <Address>
+            <Address-Line-1>1</Address-Line-1>
+            <Post-Town>Somewhere</Post-Town>
+            <Postcode>AB1 2CD</Postcode>
+        </Address>
+    </Property>
+</Report-Header>
+<SAP-Data>
+    <SAP-Property-Details>
+        <Property-Type>0</Property-Type>
+        <SAP-Building-Parts>
+            <SAP-Building-Part>
+                <Identifier>Main Dwelling</Identifier>
+                <Roof-Construction>4</Roof-Construction>
+                <Roof-Insulation-Location>ND</Roof-Insulation-Location>
+                <Roof-Insulation-Thickness>250</Roof-Insulation-Thickness>
+                <SAP-Floor-Dimensions>
+                    <SAP-Floor-Dimension>
+                        <Heat-Loss-Perimeter quantity="metres">10.0</Heat-Loss-Perimeter>
+                        <Room-Height quantity="metres">2.5</Room-Height>
+                        <Total-Floor-Area quantity="square metres">50.0</Total-Floor-Area>
+                        <Floor>0</Floor>
+                        <Party-Wall-Length>0</Party-Wall-Length>
+                    </SAP-Floor-Dimension>
+                </SAP-Floor-Dimensions>
+            </SAP-Building-Part>
+        </SAP-Building-Parts>
+    </SAP-Property-Details>
+</SAP-Data>
+</RdSAP-Report>
+"""
+
+
+def test_parse_rdsap_nd_thickness():
+    # 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Thickness
+    # — it should be retained as-is rather than raising
+
+    # arrange + act
+    result: SapPropertyDetails = parse_rdsap(ND_THICKNESS_XML)
+
+    # assert
+    assert result["building_parts"][0]["roof"] == {
+        "construction": 4,
+        "insulation_location": 2,
+        "insulation_thickness_mm": "ND",
+    }
+
+
+def test_parse_rdsap_nd_location():
+    # 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Location
+    # — it should be retained as-is rather than raising
+
+    # arrange + act
+    result: SapPropertyDetails = parse_rdsap(ND_INSULATION_LOCATION_XML)
+
+    # assert
+    assert result["building_parts"][0]["roof"] == {
+        "construction": 4,
+        "insulation_location": "ND",
+        "insulation_thickness_mm": 250,
+    }
+
+
+def test_flatten_full():
+    # Two building parts; Main Dwelling has two floors + full roof,
+    # Extension has one floor + partial roof (no thickness)
+
+    # arrange
+    details: SapPropertyDetails = parse_rdsap(SAMPLE_XML)
+
+    # act
+    result = flatten_sap_property(details)
+
+    # assert
+    assert result == {
+        "reference": "1AB245CD",
+        "address": "1, Fake Avenue, Random, AB24 5CD",
+        "property_type": "House",
+        "main_dwelling_floor_1_area_m2": 43.61,
+        "main_dwelling_floor_1_height_m": 2.46,
+        "main_dwelling_floor_1_heat_loss_perimeter_m": 25.31,
+        "main_dwelling_floor_1_party_wall_length_m": 0.0,
+        "main_dwelling_floor_2_area_m2": 42.33,
+        "main_dwelling_floor_2_height_m": 2.44,
+        "main_dwelling_floor_2_heat_loss_perimeter_m": 26.16,
+        "main_dwelling_floor_2_party_wall_length_m": 0.0,
+        "main_dwelling_roof_construction": 4,
+        "main_dwelling_roof_insulation_location": 2,
+        "main_dwelling_roof_insulation_thickness_mm": 100.0,
+        "extension_floor_1_area_m2": 4.46,
+        "extension_floor_1_height_m": 2.24,
+        "extension_floor_1_heat_loss_perimeter_m": 6.85,
+        "extension_floor_1_party_wall_length_m": 0.0,
+        "extension_roof_construction": 8,
+        "extension_roof_insulation_location": 7,
+    }
+
+
+def test_flatten_no_roof():
+    # Single building part with no roof — roof keys must be absent entirely
+
+    # arrange
+    details: SapPropertyDetails = parse_rdsap(NO_ROOF_XML)
+
+    # act
+    result = flatten_sap_property(details)
+
+    # assert
+    assert result == {
+        "reference": "5XY12AB",
+        "address": "5, Somewhere, XY1 2AB",
+        "property_type": "House",
+        "main_dwelling_floor_1_area_m2": 50.0,
+        "main_dwelling_floor_1_height_m": 2.5,
+        "main_dwelling_floor_1_heat_loss_perimeter_m": 10.0,
+        "main_dwelling_floor_1_party_wall_length_m": 3.0,
+    }
diff --git a/backend/ecmk_fetcher/upload.py b/backend/ecmk_fetcher/upload.py
index 0a744e53..cc2c908d 100644
--- a/backend/ecmk_fetcher/upload.py
+++ b/backend/ecmk_fetcher/upload.py
@@ -28,6 +28,19 @@ def upload_file_to_sharepoint(
     )
 
 
+def upload_excel_to_sharepoint(
+    client: DomnaSharepointClient,
+    file_path: str,
+    sharepoint_path: str,
+) -> None:
+    client.upload_file(
+        file_path=file_path,
+        sharepoint_path=sharepoint_path,
+        file_name=os.path.basename(file_path),
+    )
+
+
+# TODO: this should be moved to somewhere common and called by pashub fetcher
 def upload_file_to_s3_and_update_db(
     bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum
 ) -> None:
diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py
new file mode 100644
index 00000000..f993038b
--- /dev/null
+++ b/backend/ecmk_fetcher/xml_processor.py
@@ -0,0 +1,226 @@
+import xml.etree.ElementTree as ET
+from typing import Any, List, Optional, TypedDict
+
+
+from backend.ecmk_fetcher.reports import build_property_id
+from datatypes.epc.domain.field_mappings import PROPERTY_TYPE_LOOKUP
+
+
+# This file should ultimately live somewhere different, probably
+class Floor(TypedDict):
+    area_m2: float
+    height_m: float
+    heat_loss_perimeter_m: float
+    party_wall_length_m: float
+
+
+class Roof(TypedDict, total=False):
+    construction: int  # TODO: map to str
+    insulation_location: int | str  # TODO: map to str
+    insulation_thickness_mm: float | str
+
+
+class BuildingPart(TypedDict):
+    identifier: str  # e.g. "Main Dwelling", "Extension"
+    floors: List[Floor]
+    roof: Optional[Roof]
+
+
+class SapPropertyDetails(TypedDict):
+    reference: str
+    address: str
+    property_type: str
+    building_parts: List[BuildingPart]
+
+
+def _get_namespace(tag: str) -> str:
+    return tag.split("}")[0].strip("{")
+
+
+def _require_text(value: Optional[str], field: str) -> str:
+    if value is None:
+        raise ValueError(f"Missing required field: {field}")
+    return value
+
+
+def _parse_float(value: Optional[str], field: str) -> float:
+    if value is None:
+        raise ValueError(f"Missing float field: {field}")
+    return float(value)
+
+
+def _parse_int(value: Optional[str], field: str) -> int:
+    if value is None:
+        raise ValueError(f"Missing int field: {field}")
+    return int(value)
+
+
+def _parse_thickness_mm(value: Optional[str]) -> Optional[float | str]:
+    if value is None:
+        return None
+    stripped = value.replace("mm", "").strip()
+    try:
+        return float(stripped)
+    except ValueError:
+        return stripped
+
+
+def parse_rdsap(xml_string: str) -> SapPropertyDetails:
+    root = ET.fromstring(xml_string)
+
+    ns_uri: str = _get_namespace(root.tag)
+    ns: dict[str, str] = {"r": ns_uri}
+
+    # --- Address ---
+    addr_elem = root.find(".//r:Address", ns)
+    if addr_elem is None:
+        raise ValueError("Address element not found")
+
+    address_line_1: str = addr_elem.findtext(
+        "r:Address-Line-1", default="", namespaces=ns
+    )
+    postcode: str = addr_elem.findtext("r:Postcode", default="", namespaces=ns)
+
+    address_parts: List[str] = [
+        address_line_1,
+        addr_elem.findtext("r:Address-Line-2", default="", namespaces=ns),
+        addr_elem.findtext("r:Post-Town", default="", namespaces=ns),
+        postcode,
+    ]
+
+    address: str = ", ".join(part for part in address_parts if part)
+    reference: str = build_property_id(address_line_1, postcode)
+
+    # --- Property Type ---
+    prop_type_text = root.findtext(".//r:Property-Type", namespaces=ns)
+    prop_type_code: int = _parse_int(prop_type_text, "Property-Type")
+    property_type: str = PROPERTY_TYPE_LOOKUP[prop_type_code]
+
+    # --- Building Parts ---
+    building_parts: List[BuildingPart] = []
+
+    for bp in root.findall(".//r:SAP-Building-Part", ns):
+
+        identifier_text = bp.findtext("r:Identifier", namespaces=ns)
+        identifier: str = _require_text(identifier_text, "Identifier")
+
+        # Floors
+        floors: List[Floor] = []
+
+        for f in bp.findall(".//r:SAP-Floor-Dimension", ns):
+
+            area = _parse_float(
+                f.findtext("r:Total-Floor-Area", namespaces=ns),
+                "Total-Floor-Area",
+            )
+
+            height = _parse_float(
+                f.findtext("r:Room-Height", namespaces=ns),
+                "Room-Height",
+            )
+
+            heat_loss = _parse_float(
+                f.findtext("r:Heat-Loss-Perimeter", namespaces=ns),
+                "Heat-Loss-Perimeter",
+            )
+
+            party_wall = _parse_float(
+                f.findtext("r:Party-Wall-Length", namespaces=ns),
+                "Party-Wall-Length",
+            )
+
+            floor: Floor = {
+                "area_m2": area,
+                "height_m": height,
+                "heat_loss_perimeter_m": heat_loss,
+                "party_wall_length_m": party_wall,
+            }
+
+            floors.append(floor)
+
+        # Roof (optional)
+        roof: Optional[Roof] = None
+
+        roof_construction_text = bp.findtext("r:Roof-Construction", namespaces=ns)
+        roof_ins_loc_text = bp.findtext("r:Roof-Insulation-Location", namespaces=ns)
+        roof_thickness_text = bp.findtext("r:Roof-Insulation-Thickness", namespaces=ns)
+
+        if (
+            roof_construction_text is not None
+            or roof_ins_loc_text is not None
+            or roof_thickness_text is not None
+        ):
+            roof_dict: Roof = {}
+
+            if roof_construction_text is not None:
+                roof_dict["construction"] = _parse_int(
+                    roof_construction_text, "Roof-Construction"
+                )
+
+            if roof_ins_loc_text is not None:
+                try:
+                    roof_dict["insulation_location"] = _parse_int(
+                        roof_ins_loc_text, "Roof-Insulation-Location"
+                    )
+                except ValueError:
+                    roof_dict["insulation_location"] = roof_ins_loc_text
+
+            thickness = _parse_thickness_mm(roof_thickness_text)
+            if thickness is not None:
+                roof_dict["insulation_thickness_mm"] = thickness
+
+            roof = roof_dict
+
+        building_part: BuildingPart = {
+            "identifier": identifier,
+            "floors": floors,
+            "roof": roof,
+        }
+
+        building_parts.append(building_part)
+
+    result: SapPropertyDetails = {
+        "reference": reference,
+        "address": address,
+        "property_type": property_type,
+        "building_parts": building_parts,
+    }
+
+    return result
+
+
+def _normalise_identifier(identifier: str) -> str:
+    return identifier.lower().replace(" ", "_").replace("-", "_")
+
+
+def flatten_sap_property(details: SapPropertyDetails) -> dict[str, Any]:
+    row: dict[str, Any] = {}
+
+    row["reference"] = details["reference"]
+    row["address"] = details["address"]
+    row["property_type"] = details["property_type"]
+
+    for bp in details["building_parts"]:
+        prefix = _normalise_identifier(bp["identifier"])
+
+        for i, floor in enumerate(bp["floors"], start=1):
+            floor_prefix = f"{prefix}_floor_{i}"
+            row[f"{floor_prefix}_area_m2"] = floor["area_m2"]
+            row[f"{floor_prefix}_height_m"] = floor["height_m"]
+            row[f"{floor_prefix}_heat_loss_perimeter_m"] = floor[
+                "heat_loss_perimeter_m"
+            ]
+            row[f"{floor_prefix}_party_wall_length_m"] = floor["party_wall_length_m"]
+
+        roof = bp.get("roof")
+        if roof:
+            if "construction" in roof:
+                row[f"{prefix}_roof_construction"] = roof["construction"]
+            if "insulation_location" in roof:
+                row[f"{prefix}_roof_insulation_location"] = roof["insulation_location"]
+            if "insulation_thickness_mm" in roof:
+                row[f"{prefix}_roof_insulation_thickness_mm"] = roof[
+                    "insulation_thickness_mm"
+                ]
+
+    return row
diff --git a/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx b/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx
index a6478e3b..beb679c1 100644
Binary files a/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx and b/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx differ
diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile
index d045becd..e450d340 100644
--- a/backend/pashub_fetcher/handler/Dockerfile
+++ b/backend/pashub_fetcher/handler/Dockerfile
@@ -22,5 +22,5 @@ ENTRYPOINT ["python", "-m", "awslambdaric"]
 # -----------------------------
 # Lambda handler
 # -----------------------------
-CMD ["backend.pashub_fetcher.handler.test_handler.handler"]
+CMD ["backend.pashub_fetcher.handler.handler"]
 # CMD ["backend.pashub_fetcher.handler.handler.handler"]
\ No newline at end of file
diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py
index 3689efe9..60b946c1 100644
--- a/backend/pashub_fetcher/handler/handler.py
+++ b/backend/pashub_fetcher/handler/handler.py
@@ -1,8 +1,7 @@
 from datetime import datetime, timezone
-import json
 import os
 import re
-from typing import Any, Dict, List, Mapping, Optional
+from typing import Any, Dict, List, Optional
 from openpyxl import load_workbook
 
 from backend.app.config import get_settings
@@ -104,10 +103,19 @@ def upload_job_to_sharepoint(
         )
 
 
-def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
+def upload_job_to_s3_and_update_db(
+    job_files: List[str], uprn: Optional[str], hubspot_deal_id: Optional[str]
+) -> None:
     bucket = "retrofit-energy-assessments-dev"
 
-    base_path = f"documents/uprn/{uprn}"
+    if not uprn and not hubspot_deal_id:
+        return
+
+    base_path = (
+        f"documents/uprn/{uprn}"
+        if uprn
+        else f"documents/hubspot_deal_id/{hubspot_deal_id}"
+    )
 
     uploaded_files: List[UploadedFile] = []
 
@@ -118,12 +126,14 @@ def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
         upload_file_to_s3(file_path, bucket, file_key)
 
         # load row to db
+        # TODO: use same upload_file_to_s3_and_update_db method as ecmk fetcher does
         uploaded_files.append(
             UploadedFile(
                 s3_file_bucket=bucket,
                 s3_file_key=file_key,
                 s3_upload_timestamp=datetime.now(timezone.utc),
-                uprn=int(uprn),
+                uprn=int(uprn) if uprn else None,
+                hubspot_deal_id=hubspot_deal_id,
                 file_source=FileSourceEnum.PAS_HUB.value,
                 file_type=infer_file_type(filename),
             )
@@ -144,6 +154,7 @@ def process_job(
     job_id = job.pashub_job_id
 
     uprn: Optional[str] = job.uprn or pashub_client.get_uprn_by_job_id(job_id)
+    hubspot_deal_id: Optional[str] = job.hubspot_deal_id
 
     if uprn:
         logger.info(f"Got UPRN {uprn} for job {job_id}")
@@ -152,9 +163,9 @@ def process_job(
 
     job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(job_id)
 
-    if uprn:
+    if uprn or hubspot_deal_id:
         logger.info("Uploading files to s3")
-        upload_job_to_s3_and_update_db(job_files, uprn)
+        upload_job_to_s3_and_update_db(job_files, uprn, hubspot_deal_id)
 
     # # Comment out sharepoint loading for now:
     # Seems like the sharepoint link in pas hub is inconsistent in terms
@@ -167,9 +178,8 @@ def process_job(
 
 
 @task_handler()
-def handler(event: Mapping[str, Any], context: Any) -> None:
+def handler(body: Dict[str, Any], context: Any) -> List[str]:
     logger.info("Received message")
-    logger.info(f"Number of events: {len(event.get('Records', []))}")
 
     settings = get_settings()
 
@@ -185,48 +195,34 @@ def handler(event: Mapping[str, Any], context: Any) -> None:
         sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
     )
 
-    saved_file_paths: List[str] = []
+    logger.debug("Validating request body")
+    payload = PashubToAraTriggerRequest.model_validate(body)
+    logger.debug("Successfully validated request body")
 
-    for record in event.get("Records", []):
-        try:
-            body_dict = json.loads(record["body"])
-            logger.debug("Validating request body")
+    try:
+        files: List[str] = process_job(
+            payload,
+            pashub_client,
+            sharepoint_client,
+        )
+    except UnauthorizedError:
+        logger.warning("Token expired - refreshing")
 
-            payload = PashubToAraTriggerRequest.model_validate(body_dict)
+        pashub_client = get_pashub_client(
+            pas_hub_email,
+            pas_hub_password,
+        )
 
-            logger.debug("Successfully validated request body")
+        # retry once
+        files = process_job(
+            payload,
+            pashub_client,
+            sharepoint_client,
+        )
 
-            try:
-                files: List[str] = process_job(
-                    payload,
-                    pashub_client,
-                    sharepoint_client,
-                )
-                saved_file_paths.extend(files)
+    logger.info(f"Saved {len(files)} files")
 
-            except UnauthorizedError:
-                logger.warning("Token expired - refreshing")
-
-                pashub_client = get_pashub_client(
-                    pas_hub_email,
-                    pas_hub_password,
-                )
-
-                # retry once
-                files: List[str] = process_job(
-                    payload,
-                    pashub_client,
-                    sharepoint_client,
-                )
-                saved_file_paths.extend(files)
-
-        except Exception as e:
-            logger.info("Handler exception")
-            logger.error(f"Failed to process record: {e}")
-
-    logger.info("Successfully loaded jobs from spreadsheet")
-
-    logger.info(f"Saved {len(saved_file_paths)} files")
+    return files
 
 
 if __name__ == "__main__":
diff --git a/backend/pashub_fetcher/local_handler/invoke_local_lambda.py b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py
index 463ef9d8..219446fd 100644
--- a/backend/pashub_fetcher/local_handler/invoke_local_lambda.py
+++ b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py
@@ -12,7 +12,8 @@ payload = {
         {
             "body": json.dumps(
                 {
-                    "uprn": 123456,
+                    "pashub_link": "https://pashub.net/jobs/00000000-0000-0000-0000-000000000000/details",
+                    "uprn": "123456",
                 }
             )
         }
diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py
index 2e4f8380..518a8dc3 100644
--- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py
+++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py
@@ -12,6 +12,8 @@ class PashubToAraTriggerRequest(BaseModel):
     uprn: Optional[str] = None
     landlord_property_id: Optional[str] = None
     deal_stage: Optional[str] = None
+    hubspot_listing_id: Optional[int] = None
+    hubspot_deal_id: Optional[str] = None
 
     @property
     def pashub_job_id(self) -> str:
diff --git a/backend/pashub_fetcher/trigger_lambda_from_file.py b/backend/pashub_fetcher/trigger_lambda_from_file.py
new file mode 100644
index 00000000..fb9d1cbf
--- /dev/null
+++ b/backend/pashub_fetcher/trigger_lambda_from_file.py
@@ -0,0 +1,63 @@
+import json
+import os
+import re
+from typing import Any, Dict, List
+
+from openpyxl import load_workbook
+
+from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
+    PashubToAraTriggerRequest,
+)
+from backend.pashub_fetcher.handler.handler import handler
+
+
+if __name__ == "__main__":
+    BASE_DIR = os.path.dirname(os.path.dirname(__file__))
+    filepath: str = os.path.join(
+        BASE_DIR,
+        "pashub_fetcher",
+        "The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx",
+    )
+
+    wb = load_workbook(filepath, data_only=True)
+    ws = wb["filtered_2"]
+
+    HEADER_ROW = 3
+
+    headers: Dict[str, int] = {}
+    for col in range(1, ws.max_column + 1):
+        value = str(ws.cell(row=HEADER_ROW, column=col).value)
+        if value:
+            headers[value.strip()] = col
+
+    name_col = headers["Name"]
+    link_col = headers["PasHub Link"]
+    hubspot_deal_id_col = headers["HubSpot ID"]
+
+    trigger_requests: List[PashubToAraTriggerRequest] = []
+
+    for row in range(HEADER_ROW + 1, ws.max_row + 1):
+        name = ws.cell(row=row, column=name_col).value
+        link = ws.cell(row=row, column=link_col).value
+        hubspot_deal_id = ws.cell(row=row, column=hubspot_deal_id_col).value
+
+        if not name or not link or not hubspot_deal_id:
+            continue
+
+        match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
+        if not match:
+            continue
+
+        trigger_requests.append(
+            PashubToAraTriggerRequest(
+                pashub_link=str(link), hubspot_deal_id=str(hubspot_deal_id)
+            )
+        )
+
+    # ---- Build fake SQS event ----
+    event: Dict[str, Any] = {
+        "Records": [{"body": json.dumps(req.model_dump())} for req in trigger_requests]
+    }
+
+    context = None
+    handler(event, context)
diff --git a/datatypes/epc/domain/field_mappings.py b/datatypes/epc/domain/field_mappings.py
new file mode 100644
index 00000000..cc0f9067
--- /dev/null
+++ b/datatypes/epc/domain/field_mappings.py
@@ -0,0 +1,3 @@
+PROPERTY_TYPE_LOOKUP = {0: "House", 1: "Bungalow", 2: "Flat", 3: "Maisonette"}
+ROOF_CONSTRUCTION_LOOKUP = {}
+ROOF_INSULATION_LOCATION_LOOKUP = {}
diff --git a/etl/hubspot/hubspot_deal_differ.py b/etl/hubspot/hubspot_deal_differ.py
index b95b544c..74c8264d 100644
--- a/etl/hubspot/hubspot_deal_differ.py
+++ b/etl/hubspot/hubspot_deal_differ.py
@@ -6,9 +6,9 @@ from etl.hubspot.utils import parse_hs_date
 
 class HubspotDealDiffer:
     COORDINATION_COMPLETE: List[str] = [
-        "v1 ioe/mtp complete",
-        "v2 ioe/mtp complete",
-        "v3 ioe/mtp complete",
+        "(v1) ioe/mtp complete",
+        "(v2) ioe/mtp complete",
+        "(v3) ioe/mtp complete",
     ]
     RETROFIT_DESIGN_COMPLETE = "uploaded"
     LODGEMENT_COMPLETE: List[str] = ["lodgement complete", "measures lodged"]
@@ -149,19 +149,19 @@ class HubspotDealDiffer:
     def _coordination_completed(
         new_deal: Dict[str, str], old_deal: HubspotDealData
     ) -> bool:
-        new_status: str = new_deal.get("coordination_status", "")
+        new_status: str = new_deal.get("coordination_status") or ""
         return (
             new_status != ""
-            and new_status in HubspotDealDiffer.COORDINATION_COMPLETE
+            and new_status.lower() in HubspotDealDiffer.COORDINATION_COMPLETE
             and new_status != old_deal.coordination_status
         )
 
     @staticmethod
     def _design_completed(new_deal: Dict[str, str], old_deal: HubspotDealData) -> bool:
-        new_status: str = new_deal.get("design_status", "")
+        new_status: str = new_deal.get("coordination_status") or ""
         return (
             new_status != ""
-            and new_status == HubspotDealDiffer.RETROFIT_DESIGN_COMPLETE
+            and new_status.lower() == HubspotDealDiffer.RETROFIT_DESIGN_COMPLETE
             and new_status != old_deal.design_status
         )
 
@@ -169,9 +169,9 @@ class HubspotDealDiffer:
     def _lodgement_completed(
         new_deal: Dict[str, str], old_deal: HubspotDealData
     ) -> bool:
-        new_status: str = new_deal.get("lodgement_status", "")
+        new_status: str = new_deal.get("coordination_status") or ""
         return (
             new_status != ""
-            and new_status in HubspotDealDiffer.LODGEMENT_COMPLETE
+            and new_status.lower() in HubspotDealDiffer.LODGEMENT_COMPLETE
             and new_status != old_deal.lodgement_status
         )
diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py
index e6c788ea..f7dc1076 100644
--- a/etl/hubspot/scripts/scraper/main.py
+++ b/etl/hubspot/scripts/scraper/main.py
@@ -104,10 +104,10 @@ def _trigger_pashub_fetcher(sqs_client: Any, hubspot_deal: Dict[str, str]) -> No
     message_body: Dict[str, Optional[str]] = {
         "pashub_link": hubspot_deal["pashub_link"],
         "address": None,  # potentially available from Listing, leave as None for now
-        "sharepoint_link": hubspot_deal["sharepoint_link"],
-        "uprn": hubspot_deal["national_uprn"],
-        "landlord_property_id": hubspot_deal["owner_property_id"],
-        "deal_stage": hubspot_deal["deal_stage"],
+        "sharepoint_link": hubspot_deal.get("sharepoint_link", None),
+        "uprn": hubspot_deal.get("national_uprn", None),
+        "landlord_property_id": hubspot_deal.get("owner_property_id", None),
+        "deal_stage": hubspot_deal.get("deal_stage", None),
     }
 
     response = sqs_client.send_message(
@@ -121,5 +121,5 @@ def _trigger_pashub_fetcher(sqs_client: Any, hubspot_deal: Dict[str, str]) -> No
 
 
 if __name__ == "__main__":
-    handler({"hubspot_deal_id": "371470706915"}, "")
+    handler({"hubspot_deal_id": "498926855369"}, "")
     print("beep")
diff --git a/pytest.ini b/pytest.ini
index 6cb3b611..33231c61 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,6 +3,6 @@ pythonpath = .
 log_cli = true
 log_cli_level = INFO
 addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
-testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/documents_parser/tests
+testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/documents_parser/tests
 markers =
     integration: mark a test as an integration test
diff --git a/utils/sharepoint/domna_sharepoint_client.py b/utils/sharepoint/domna_sharepoint_client.py
index 67e079ed..5e0255ac 100644
--- a/utils/sharepoint/domna_sharepoint_client.py
+++ b/utils/sharepoint/domna_sharepoint_client.py
@@ -90,6 +90,41 @@ class DomnaSharepointClient:
             file_name, get_file_stream(file_path), sharepoint_path
         )
 
+    def download_file(self, sharepoint_path: str, local_path: str) -> bool:
+        """
+        Download a file from SharePoint to a local path.
+
+        Returns True if the file was downloaded, False if it does not exist yet.
+        Raises on any other error.
+        """
+        sharepoint_client = SharePointClient(
+            tenant_id=self.sharepoint_tenant_id,
+            client_id=self.sharepoint_client_id,
+            client_secret=self.sharepoint_client_secret,
+            site_id=self.sharepoint_drive.value,
+        )
+
+        try:
+            metadata: Dict[str, Any] = sharepoint_client.get_file_metadata(sharepoint_path)
+        except ValueError:
+            return False
+
+        download_url: Optional[str] = metadata.get("@microsoft.graph.downloadUrl")
+        if not download_url:
+            return False
+
+        content: BytesIO = SharePointClient.download_sharepoint_file(download_url)
+
+        parent_dir = os.path.dirname(local_path)
+        if parent_dir:
+            os.makedirs(parent_dir, exist_ok=True)
+
+        with open(local_path, "wb") as f:
+            f.write(content.getvalue())
+
+        self.logger.debug(f"Downloaded SharePoint file to: {local_path}")
+        return True
+
     def create_temp_file(self, content: BytesIO, path: str):
         # Ensure the path is under /tmp/
         new_path = os.path.join("/tmp/sharepoint", path)
diff --git a/utils/sharepoint/sharepoint_client.py b/utils/sharepoint/sharepoint_client.py
index 71f82b68..5807c3bd 100644
--- a/utils/sharepoint/sharepoint_client.py
+++ b/utils/sharepoint/sharepoint_client.py
@@ -278,6 +278,17 @@ class SharePointClient:
         # logger.debug(f"Listing folder contents from URL: {url}")
         return "GET", url, None
 
+    @api_call_decorator
+    def get_file_metadata(self, file_path: str) -> Dict[str, Any]:
+        """
+        GET /drives/{drive-id}/root:/{file_path}
+
+        Returns file metadata, including '@microsoft.graph.downloadUrl'.
+        Raises ValueError if the file does not exist (404).
+        """
+        url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{file_path}"
+        return "GET", url, None
+
     @api_call_decorator
     def create_folder(self, file_name: str, folder_path: str) -> Dict[str, Any]:
         """
@@ -325,7 +336,7 @@ class SharePointClient:
                 return self.upload_file(file_name, sharepoint_parent_id, file_stream)
 
     @staticmethod
-    def download_sharepoint_file(download_url):
+    def download_sharepoint_file(download_url: str) -> BytesIO:
         """
         Downloads a file from the given URL and returns its content.