diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py
index 71763790..5b34a752 100644
--- a/backend/app/db/models/uploaded_file.py
+++ b/backend/app/db/models/uploaded_file.py
@@ -16,6 +16,7 @@ class FileTypeEnum(enum.Enum):
PAS_2023_OCCUPANCY = "pas_2023_occupancy"
ECMK_SITE_NOTE = "ecmk_site_note"
ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
+ ECMK_SURVEY_XML = "ecmk_survey_xml"
class FileSourceEnum(enum.Enum):
diff --git a/backend/ecmk_fetcher/excel_writer.py b/backend/ecmk_fetcher/excel_writer.py
new file mode 100644
index 00000000..f290614b
--- /dev/null
+++ b/backend/ecmk_fetcher/excel_writer.py
@@ -0,0 +1,53 @@
+import os
+from typing import Any
+
+from openpyxl import Workbook, load_workbook
+from openpyxl.worksheet.worksheet import Worksheet
+
+
+def write_row(file_path: str, row_data: dict[str, Any]) -> None:
+ new_keys = list(row_data.keys())
+
+ if not os.path.exists(file_path):
+ wb = Workbook()
+ ws: Worksheet = wb.active # type: ignore[assignment]
+ ws.append(new_keys)
+ ws.append(list(row_data.values()))
+ wb.save(file_path)
+ return
+
+ wb = load_workbook(file_path)
+ ws = wb.active # type: ignore[assignment]
+
+ # Build a mutable header list and insert new columns using insert_cols so
+ # that existing row data shifts along with the headers.
+ # Filter out None to guard against blank columns in the source file.
+ headers: list[str] = [cell.value for cell in ws[1] if cell.value is not None] # type: ignore[misc]
+
+ for key in new_keys:
+ if key in headers:
+ continue
+
+ # Find the first key that comes after this one in new_keys that already
+ # exists in headers — insert before it to keep columns logically grouped.
+ insert_before: str | None = None
+ found = False
+ for k in new_keys:
+ if k == key:
+ found = True
+ continue
+ if found and k in headers:
+ insert_before = k
+ break
+
+ if insert_before is not None:
+ col_idx = headers.index(insert_before) + 1 # 1-based
+ ws.insert_cols(col_idx)
+ ws.cell(row=1, column=col_idx, value=key)
+ headers.insert(col_idx - 1, key)
+ else:
+ headers.append(key)
+ ws.cell(row=1, column=len(headers), value=key)
+
+ ws.append([row_data.get(col) for col in headers])
+ wb.save(file_path)
diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py
index 2f122080..4f8c24ea 100644
--- a/backend/ecmk_fetcher/processor.py
+++ b/backend/ecmk_fetcher/processor.py
@@ -26,13 +26,17 @@ from backend.ecmk_fetcher.browser import (
)
from backend.ecmk_fetcher.reports import (
REPORT_TYPES,
+ FileDownloadButtonType,
build_property_id,
map_report_type_to_db_file_type,
)
+from backend.ecmk_fetcher.excel_writer import write_row
from backend.ecmk_fetcher.upload import (
+ upload_excel_to_sharepoint,
upload_file_to_s3_and_update_db,
upload_file_to_sharepoint,
)
+from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
@@ -59,6 +63,15 @@ def run_job() -> None:
)
sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
+ sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
+
+ DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
+ local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME)
+
+ sharepoint_client.download_file(
+ sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
+ local_path=local_dimensions_path,
+ )
s3_bucket: str = "retrofit-energy-assessments-dev"
@@ -141,15 +154,30 @@ def run_job() -> None:
)
try:
- upload_file_to_sharepoint(
- client=sharepoint_client,
- file_path=file_path,
- base_path=sharepoint_base_path,
- subpath=sharepoint_address,
- )
- logger.info(
- f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
- )
+ if report_type == FileDownloadButtonType.RAW_XML.value:
+ with open(file_path, "r", encoding="utf-8") as f:
+ xml_string = f.read()
+ details = parse_rdsap(xml_string)
+ row_data = flatten_sap_property(details)
+ write_row(local_dimensions_path, row_data)
+ upload_excel_to_sharepoint(
+ client=sharepoint_client,
+ file_path=local_dimensions_path,
+ sharepoint_path=sharepoint_excel_path,
+ )
+ logger.info(
+ f"Written dimensions row and uploaded Dimensions.xlsx for {address}"
+ )
+ else:
+ upload_file_to_sharepoint(
+ client=sharepoint_client,
+ file_path=file_path,
+ base_path=sharepoint_base_path,
+ subpath=sharepoint_address,
+ )
+ logger.info(
+ f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
+ )
# Upload to s3 and update db
upload_file_to_s3_and_update_db(
diff --git a/backend/ecmk_fetcher/reports.py b/backend/ecmk_fetcher/reports.py
index d8d11d50..d2f8ea52 100644
--- a/backend/ecmk_fetcher/reports.py
+++ b/backend/ecmk_fetcher/reports.py
@@ -14,6 +14,7 @@ class FileDownloadButtonType(Enum):
REPORT_TYPES = [
FileDownloadButtonType.ASSESSOR_HUB_SITENOTE_REPORT.value,
FileDownloadButtonType.SITENOTE_REPORT.value,
+ FileDownloadButtonType.RAW_XML.value,
]
@@ -23,6 +24,8 @@ def map_report_type_to_db_file_type(report_type: int) -> FileTypeEnum:
return FileTypeEnum.ECMK_SITE_NOTE
case FileDownloadButtonType.SITENOTE_REPORT.value:
return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE
+ case FileDownloadButtonType.RAW_XML.value:
+ return FileTypeEnum.ECMK_SURVEY_XML
case _:
raise ValueError("Unknown report type")
diff --git a/backend/ecmk_fetcher/tests/test_excel_writer.py b/backend/ecmk_fetcher/tests/test_excel_writer.py
new file mode 100644
index 00000000..3f730951
--- /dev/null
+++ b/backend/ecmk_fetcher/tests/test_excel_writer.py
@@ -0,0 +1,123 @@
+import os
+import pathlib
+import pytest
+from openpyxl import load_workbook
+from openpyxl.worksheet.worksheet import Worksheet
+
+from backend.ecmk_fetcher.excel_writer import write_row
+
+
+@pytest.fixture
+def xlsx_path(tmp_path: pathlib.Path) -> str:
+ return str(tmp_path / "output.xlsx")
+
+
+def _active_sheet(file_path: str) -> Worksheet:
+ ws = load_workbook(file_path).active
+ assert isinstance(ws, Worksheet)
+ return ws
+
+
+def test_write_row_creates_file(xlsx_path: str):
+ # arrange
+ row = {
+ "address": "1 Fake Avenue, AB24 5CD",
+ "property_type": "House",
+ "main_dwelling_floor_1_area_m2": 43.61,
+ }
+
+ # act
+ write_row(xlsx_path, row)
+
+ # assert
+ assert os.path.exists(xlsx_path)
+ ws = _active_sheet(xlsx_path)
+ assert [c.value for c in ws[1]] == list(row.keys())
+ assert [c.value for c in ws[2]] == list(row.values())
+
+
+def test_write_row_appends_to_existing(xlsx_path: str):
+ # arrange
+ row_a = {
+ "address": "1 Fake Avenue, AB24 5CD",
+ "property_type": "House",
+ "main_dwelling_floor_1_area_m2": 43.61,
+ }
+ row_b = {
+ "address": "2 Other Street, XY1 2AB",
+ "property_type": "Flat",
+ "main_dwelling_floor_1_area_m2": 30.0,
+ }
+
+ # act
+ write_row(xlsx_path, row_a)
+ write_row(xlsx_path, row_b)
+
+ # assert
+ ws = _active_sheet(xlsx_path)
+ assert ws.max_row == 3 # 1 header + 2 data rows
+ assert [c.value for c in ws[1]] == list(row_a.keys())
+ assert [c.value for c in ws[2]] == list(row_a.values())
+ assert [c.value for c in ws[3]] == list(row_b.values())
+
+
+def test_write_row_inserts_new_columns_at_logical_positions(xlsx_path: str):
+ # arrange
+ # First row: main_dwelling floor 1 + roof
+ # Second row: also has main_dwelling floor 2 — should be inserted between floor 1 and roof,
+ # not appended to the end
+ row_a = {
+ "address": "1 Fake Avenue, AB24 5CD",
+ "property_type": "House",
+ "main_dwelling_floor_1_area_m2": 43.61,
+ "main_dwelling_floor_1_height_m": 2.46,
+ "main_dwelling_roof_construction": 4,
+ }
+ row_b = {
+ "address": "2 Other Street, XY1 2AB",
+ "property_type": "House",
+ "main_dwelling_floor_1_area_m2": 50.0,
+ "main_dwelling_floor_1_height_m": 2.5,
+ "main_dwelling_floor_2_area_m2": 48.0,
+ "main_dwelling_floor_2_height_m": 2.4,
+ "main_dwelling_roof_construction": 4,
+ }
+
+ # act
+ write_row(xlsx_path, row_a)
+ write_row(xlsx_path, row_b)
+
+ # assert
+ ws = _active_sheet(xlsx_path)
+
+ assert [c.value for c in ws[1]] == [
+ "address",
+ "property_type",
+ "main_dwelling_floor_1_area_m2",
+ "main_dwelling_floor_1_height_m",
+ "main_dwelling_floor_2_area_m2", # inserted before roof, not at end
+ "main_dwelling_floor_2_height_m",
+ "main_dwelling_roof_construction",
+ ]
+
+ # row_a had no floor_2 data — those cells should be empty
+ assert [c.value for c in ws[2]] == [
+ "1 Fake Avenue, AB24 5CD",
+ "House",
+ 43.61,
+ 2.46,
+ None, # main_dwelling_floor_2_area_m2
+ None, # main_dwelling_floor_2_height_m
+ 4,
+ ]
+
+ # row_b should be fully populated
+ assert [c.value for c in ws[3]] == [
+ "2 Other Street, XY1 2AB",
+ "House",
+ 50.0,
+ 2.5,
+ 48.0,
+ 2.4,
+ 4,
+ ]
diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py
new file mode 100644
index 00000000..3695b09d
--- /dev/null
+++ b/backend/ecmk_fetcher/tests/test_xml_processor.py
@@ -0,0 +1,329 @@
+from backend.ecmk_fetcher.xml_processor import (
+ SapPropertyDetails,
+ flatten_sap_property,
+ parse_rdsap,
+)
+
+
+SAMPLE_XML = """
+
+
+
+ 1
+ Fake Avenue
+ Random
+ AB24 5CD
+
+
+
+
+
+
+ 0
+
+
+
+
+ 1
+ Main Dwelling
+ C
+ 7
+
+ 4
+ 2
+ 100mm
+
+ 4
+ 4
+
+
+
+ 25.31
+ 2.46
+ 43.61
+ 0
+ 0
+
+
+
+ 26.16
+ 2.44
+ 42.33
+ 1
+ 0
+
+
+
+
+
+
+ 2
+ Extension
+ C
+
+ 8
+ 7
+ AB
+
+ 3
+ 4
+
+
+
+ 6.85
+ 2.24
+ 4.46
+ 0
+ 0
+
+
+
+
+
+
+
+
+
+
+"""
+
+
+NO_ROOF_XML = """
+
+
+
+ 5
+ Somewhere
+ XY1 2AB
+
+
+
+
+
+ 0
+
+
+ Main Dwelling
+
+
+ 10.0
+ 2.5
+ 50.0
+ 0
+ 3.0
+
+
+
+
+
+
+
+"""
+
+
+def test_parse_rdsap_contract():
+ # arrange + act
+ result: SapPropertyDetails = parse_rdsap(SAMPLE_XML)
+
+ # assert
+ assert result == {
+ "reference": "1AB245CD",
+ "address": "1, Fake Avenue, Random, AB24 5CD",
+ "property_type": "House",
+ "building_parts": [
+ {
+ "identifier": "Main Dwelling",
+ "floors": [
+ {
+ "area_m2": 43.61,
+ "height_m": 2.46,
+ "heat_loss_perimeter_m": 25.31,
+ "party_wall_length_m": 0.0,
+ },
+ {
+ "area_m2": 42.33,
+ "height_m": 2.44,
+ "heat_loss_perimeter_m": 26.16,
+ "party_wall_length_m": 0.0,
+ },
+ ],
+ "roof": {
+ "construction": 4,
+ "insulation_location": 2,
+ "insulation_thickness_mm": 100.0,
+ },
+ },
+ {
+ "identifier": "Extension",
+ "floors": [
+ {
+ "area_m2": 4.46,
+ "height_m": 2.24,
+ "heat_loss_perimeter_m": 6.85,
+ "party_wall_length_m": 0.0,
+ }
+ ],
+ "roof": {
+ "construction": 8,
+ "insulation_location": 7,
+ },
+ },
+ ],
+ }
+
+
+ND_THICKNESS_XML = """
+
+
+
+ 1
+ Somewhere
+ AB1 2CD
+
+
+
+
+
+ 0
+
+
+ Main Dwelling
+ 4
+ 2
+ ND
+
+
+ 10.0
+ 2.5
+ 50.0
+ 0
+ 0
+
+
+
+
+
+
+
+"""
+
+ND_INSULATION_LOCATION_XML = """
+
+
+
+ 1
+ Somewhere
+ AB1 2CD
+
+
+
+
+
+ 0
+
+
+ Main Dwelling
+ 4
+ ND
+ 250
+
+
+ 10.0
+ 2.5
+ 50.0
+ 0
+ 0
+
+
+
+
+
+
+
+"""
+
+
+def test_parse_rdsap_nd_thickness():
+ # 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Thickness
+ # — it should be retained as-is rather than raising
+
+ # arrange + act
+ result: SapPropertyDetails = parse_rdsap(ND_THICKNESS_XML)
+
+ # assert
+ assert result["building_parts"][0]["roof"] == {
+ "construction": 4,
+ "insulation_location": 2,
+ "insulation_thickness_mm": "ND",
+ }
+
+
+def test_parse_rdsap_nd_location():
+ # 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Location
+ # — it should be retained as-is rather than raising
+
+ # arrange + act
+ result: SapPropertyDetails = parse_rdsap(ND_INSULATION_LOCATION_XML)
+
+ # assert
+ assert result["building_parts"][0]["roof"] == {
+ "construction": 4,
+ "insulation_location": "ND",
+ "insulation_thickness_mm": 250,
+ }
+
+
+def test_flatten_full():
+ # Two building parts; Main Dwelling has two floors + full roof,
+ # Extension has one floor + partial roof (no thickness)
+
+ # arrange
+ details: SapPropertyDetails = parse_rdsap(SAMPLE_XML)
+
+ # act
+ result = flatten_sap_property(details)
+
+ # assert
+ assert result == {
+ "reference": "1AB245CD",
+ "address": "1, Fake Avenue, Random, AB24 5CD",
+ "property_type": "House",
+ "main_dwelling_floor_1_area_m2": 43.61,
+ "main_dwelling_floor_1_height_m": 2.46,
+ "main_dwelling_floor_1_heat_loss_perimeter_m": 25.31,
+ "main_dwelling_floor_1_party_wall_length_m": 0.0,
+ "main_dwelling_floor_2_area_m2": 42.33,
+ "main_dwelling_floor_2_height_m": 2.44,
+ "main_dwelling_floor_2_heat_loss_perimeter_m": 26.16,
+ "main_dwelling_floor_2_party_wall_length_m": 0.0,
+ "main_dwelling_roof_construction": 4,
+ "main_dwelling_roof_insulation_location": 2,
+ "main_dwelling_roof_insulation_thickness_mm": 100.0,
+ "extension_floor_1_area_m2": 4.46,
+ "extension_floor_1_height_m": 2.24,
+ "extension_floor_1_heat_loss_perimeter_m": 6.85,
+ "extension_floor_1_party_wall_length_m": 0.0,
+ "extension_roof_construction": 8,
+ "extension_roof_insulation_location": 7,
+ }
+
+
+def test_flatten_no_roof():
+ # Single building part with no roof — roof keys must be absent entirely
+
+ # arrange
+ details: SapPropertyDetails = parse_rdsap(NO_ROOF_XML)
+
+ # act
+ result = flatten_sap_property(details)
+
+ # assert
+ assert result == {
+ "reference": "5XY12AB",
+ "address": "5, Somewhere, XY1 2AB",
+ "property_type": "House",
+ "main_dwelling_floor_1_area_m2": 50.0,
+ "main_dwelling_floor_1_height_m": 2.5,
+ "main_dwelling_floor_1_heat_loss_perimeter_m": 10.0,
+ "main_dwelling_floor_1_party_wall_length_m": 3.0,
+ }
diff --git a/backend/ecmk_fetcher/upload.py b/backend/ecmk_fetcher/upload.py
index 0a744e53..8cb451b0 100644
--- a/backend/ecmk_fetcher/upload.py
+++ b/backend/ecmk_fetcher/upload.py
@@ -28,6 +28,18 @@ def upload_file_to_sharepoint(
)
+def upload_excel_to_sharepoint(
+ client: DomnaSharepointClient,
+ file_path: str,
+ sharepoint_path: str,
+) -> None:
+ client.upload_file(
+ file_path=file_path,
+ sharepoint_path=sharepoint_path,
+ file_name=os.path.basename(file_path),
+ )
+
+
def upload_file_to_s3_and_update_db(
bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum
) -> None:
diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py
new file mode 100644
index 00000000..f993038b
--- /dev/null
+++ b/backend/ecmk_fetcher/xml_processor.py
@@ -0,0 +1,226 @@
+import xml.etree.ElementTree as ET
+from typing import Any, List, Optional, TypedDict
+
+
+from backend.ecmk_fetcher.reports import build_property_id
+from datatypes.epc.domain.field_mappings import PROPERTY_TYPE_LOOKUP
+
+
+# This file should ultimately live somewhere different, probably
+class Floor(TypedDict):
+ area_m2: float
+ height_m: float
+ heat_loss_perimeter_m: float
+ party_wall_length_m: float
+
+
+class Roof(TypedDict, total=False):
+ construction: int # TODO: map to str
+ insulation_location: int | str # TODO: map to str
+ insulation_thickness_mm: float | str
+
+
+class BuildingPart(TypedDict):
+ identifier: str # e.g. "Main Dwelling", "Extension"
+ floors: List[Floor]
+ roof: Optional[Roof]
+
+
+class SapPropertyDetails(TypedDict):
+ reference: str
+ address: str
+ property_type: str
+ building_parts: List[BuildingPart]
+
+
+def _get_namespace(tag: str) -> str:
+ return tag.split("}")[0].strip("{")
+
+
+def _require_text(value: Optional[str], field: str) -> str:
+ if value is None:
+ raise ValueError(f"Missing required field: {field}")
+ return value
+
+
+def _parse_float(value: Optional[str], field: str) -> float:
+ if value is None:
+ raise ValueError(f"Missing float field: {field}")
+ return float(value)
+
+
+def _parse_int(value: Optional[str], field: str) -> int:
+ if value is None:
+ raise ValueError(f"Missing int field: {field}")
+ return int(value)
+
+
+def _parse_thickness_mm(value: Optional[str]) -> Optional[float | str]:
+ if value is None:
+ return None
+ stripped = value.replace("mm", "").strip()
+ try:
+ return float(stripped)
+ except ValueError:
+ return stripped
+
+
+def parse_rdsap(xml_string: str) -> SapPropertyDetails:
+ root = ET.fromstring(xml_string)
+
+ ns_uri: str = _get_namespace(root.tag)
+ ns: dict[str, str] = {"r": ns_uri}
+
+ # --- Address ---
+ addr_elem = root.find(".//r:Address", ns)
+ if addr_elem is None:
+ raise ValueError("Address element not found")
+
+ address_line_1: str = addr_elem.findtext(
+ "r:Address-Line-1", default="", namespaces=ns
+ )
+ postcode: str = addr_elem.findtext("r:Postcode", default="", namespaces=ns)
+
+ address_parts: List[str] = [
+ address_line_1,
+ addr_elem.findtext("r:Address-Line-2", default="", namespaces=ns),
+ addr_elem.findtext("r:Post-Town", default="", namespaces=ns),
+ postcode,
+ ]
+
+ address: str = ", ".join(part for part in address_parts if part)
+ reference: str = build_property_id(address_line_1, postcode)
+
+ # --- Property Type ---
+ prop_type_text = root.findtext(".//r:Property-Type", namespaces=ns)
+ prop_type_code: int = _parse_int(prop_type_text, "Property-Type")
+ property_type: str = PROPERTY_TYPE_LOOKUP[prop_type_code]
+
+ # --- Building Parts ---
+ building_parts: List[BuildingPart] = []
+
+ for bp in root.findall(".//r:SAP-Building-Part", ns):
+
+ identifier_text = bp.findtext("r:Identifier", namespaces=ns)
+ identifier: str = _require_text(identifier_text, "Identifier")
+
+ # Floors
+ floors: List[Floor] = []
+
+ for f in bp.findall(".//r:SAP-Floor-Dimension", ns):
+
+ area = _parse_float(
+ f.findtext("r:Total-Floor-Area", namespaces=ns),
+ "Total-Floor-Area",
+ )
+
+ height = _parse_float(
+ f.findtext("r:Room-Height", namespaces=ns),
+ "Room-Height",
+ )
+
+ heat_loss = _parse_float(
+ f.findtext("r:Heat-Loss-Perimeter", namespaces=ns),
+ "Heat-Loss-Perimeter",
+ )
+
+ party_wall = _parse_float(
+ f.findtext("r:Party-Wall-Length", namespaces=ns),
+ "Party-Wall-Length",
+ )
+
+ floor: Floor = {
+ "area_m2": area,
+ "height_m": height,
+ "heat_loss_perimeter_m": heat_loss,
+ "party_wall_length_m": party_wall,
+ }
+
+ floors.append(floor)
+
+ # Roof (optional)
+ roof: Optional[Roof] = None
+
+ roof_construction_text = bp.findtext("r:Roof-Construction", namespaces=ns)
+ roof_ins_loc_text = bp.findtext("r:Roof-Insulation-Location", namespaces=ns)
+ roof_thickness_text = bp.findtext("r:Roof-Insulation-Thickness", namespaces=ns)
+
+ if (
+ roof_construction_text is not None
+ or roof_ins_loc_text is not None
+ or roof_thickness_text is not None
+ ):
+ roof_dict: Roof = {}
+
+ if roof_construction_text is not None:
+ roof_dict["construction"] = _parse_int(
+ roof_construction_text, "Roof-Construction"
+ )
+
+ if roof_ins_loc_text is not None:
+ try:
+ roof_dict["insulation_location"] = _parse_int(
+ roof_ins_loc_text, "Roof-Insulation-Location"
+ )
+ except ValueError:
+ roof_dict["insulation_location"] = roof_ins_loc_text
+
+ thickness = _parse_thickness_mm(roof_thickness_text)
+ if thickness is not None:
+ roof_dict["insulation_thickness_mm"] = thickness
+
+ roof = roof_dict
+
+ building_part: BuildingPart = {
+ "identifier": identifier,
+ "floors": floors,
+ "roof": roof,
+ }
+
+ building_parts.append(building_part)
+
+ result: SapPropertyDetails = {
+ "reference": reference,
+ "address": address,
+ "property_type": property_type,
+ "building_parts": building_parts,
+ }
+
+ return result
+
+
+def _normalise_identifier(identifier: str) -> str:
+ return identifier.lower().replace(" ", "_").replace("-", "_")
+
+
+def flatten_sap_property(details: SapPropertyDetails) -> dict[str, Any]:
+ row: dict[str, Any] = {}
+
+ row["reference"] = details["reference"]
+ row["address"] = details["address"]
+ row["property_type"] = details["property_type"]
+
+ for bp in details["building_parts"]:
+ prefix = _normalise_identifier(bp["identifier"])
+
+ for i, floor in enumerate(bp["floors"], start=1):
+ floor_prefix = f"{prefix}_floor_{i}"
+ row[f"{floor_prefix}_area_m2"] = floor["area_m2"]
+ row[f"{floor_prefix}_height_m"] = floor["height_m"]
+ row[f"{floor_prefix}_heat_loss_perimeter_m"] = floor[
+ "heat_loss_perimeter_m"
+ ]
+ row[f"{floor_prefix}_party_wall_length_m"] = floor["party_wall_length_m"]
+
+ roof = bp.get("roof")
+ if roof:
+ if "construction" in roof:
+ row[f"{prefix}_roof_construction"] = roof["construction"]
+ if "insulation_location" in roof:
+ row[f"{prefix}_roof_insulation_location"] = roof["insulation_location"]
+ if "insulation_thickness_mm" in roof:
+ row[f"{prefix}_roof_insulation_thickness_mm"] = roof[
+ "insulation_thickness_mm"
+ ]
+
+ return row
diff --git a/datatypes/epc/domain/field_mappings.py b/datatypes/epc/domain/field_mappings.py
new file mode 100644
index 00000000..cc0f9067
--- /dev/null
+++ b/datatypes/epc/domain/field_mappings.py
@@ -0,0 +1,3 @@
+PROPERTY_TYPE_LOOKUP = {0: "House", 1: "Bungalow", 2: "Flat", 3: "Maisonette"}
+ROOF_CONSTRUCTION_LOOKUP = {}
+ROOF_INSULATION_LOCATION_LOOKUP = {}
diff --git a/pytest.ini b/pytest.ini
index 55c2873a..8f8ceeef 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,6 +3,6 @@ pythonpath = .
log_cli = true
log_cli_level = INFO
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
-testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests
+testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/
markers =
integration: mark a test as an integration test
diff --git a/utils/sharepoint/domna_sharepoint_client.py b/utils/sharepoint/domna_sharepoint_client.py
index 67e079ed..5e0255ac 100644
--- a/utils/sharepoint/domna_sharepoint_client.py
+++ b/utils/sharepoint/domna_sharepoint_client.py
@@ -90,6 +90,41 @@ class DomnaSharepointClient:
file_name, get_file_stream(file_path), sharepoint_path
)
+ def download_file(self, sharepoint_path: str, local_path: str) -> bool:
+ """
+ Download a file from SharePoint to a local path.
+
+ Returns True if the file was downloaded, False if it does not exist yet.
+ Raises on any other error.
+ """
+ sharepoint_client = SharePointClient(
+ tenant_id=self.sharepoint_tenant_id,
+ client_id=self.sharepoint_client_id,
+ client_secret=self.sharepoint_client_secret,
+ site_id=self.sharepoint_drive.value,
+ )
+
+ try:
+ metadata: Dict[str, Any] = sharepoint_client.get_file_metadata(sharepoint_path)
+ except ValueError:
+ return False
+
+ download_url: Optional[str] = metadata.get("@microsoft.graph.downloadUrl")
+ if not download_url:
+ return False
+
+ content: BytesIO = SharePointClient.download_sharepoint_file(download_url)
+
+ parent_dir = os.path.dirname(local_path)
+ if parent_dir:
+ os.makedirs(parent_dir, exist_ok=True)
+
+ with open(local_path, "wb") as f:
+ f.write(content.getvalue())
+
+ self.logger.debug(f"Downloaded SharePoint file to: {local_path}")
+ return True
+
def create_temp_file(self, content: BytesIO, path: str):
# Ensure the path is under /tmp/
new_path = os.path.join("/tmp/sharepoint", path)
diff --git a/utils/sharepoint/sharepoint_client.py b/utils/sharepoint/sharepoint_client.py
index 71f82b68..5807c3bd 100644
--- a/utils/sharepoint/sharepoint_client.py
+++ b/utils/sharepoint/sharepoint_client.py
@@ -278,6 +278,17 @@ class SharePointClient:
# logger.debug(f"Listing folder contents from URL: {url}")
return "GET", url, None
+ @api_call_decorator
+ def get_file_metadata(self, file_path: str) -> Dict[str, Any]:
+ """
+ GET /drives/{drive-id}/root:/{file_path}
+
+ Returns file metadata, including '@microsoft.graph.downloadUrl'.
+ Raises ValueError if the file does not exist (404).
+ """
+ url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{file_path}"
+ return "GET", url, None
+
@api_call_decorator
def create_folder(self, file_name: str, folder_path: str) -> Dict[str, Any]:
"""
@@ -325,7 +336,7 @@ class SharePointClient:
return self.upload_file(file_name, sharepoint_parent_id, file_stream)
@staticmethod
- def download_sharepoint_file(download_url):
+ def download_sharepoint_file(download_url: str) -> BytesIO:
"""
Downloads a file from the given URL and returns its content.