From edb643eb653e82c0a9e7cad39bbd336210e31c11 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 14 Apr 2026 10:32:32 +0000 Subject: [PATCH 01/19] also get xml from ecmk but dont put in sharepoint --- backend/ecmk_fetcher/processor.py | 20 +++++++++++--------- backend/ecmk_fetcher/reports.py | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py index 2f122080..e32f8dd5 100644 --- a/backend/ecmk_fetcher/processor.py +++ b/backend/ecmk_fetcher/processor.py @@ -26,6 +26,7 @@ from backend.ecmk_fetcher.browser import ( ) from backend.ecmk_fetcher.reports import ( REPORT_TYPES, + FileDownloadButtonType, build_property_id, map_report_type_to_db_file_type, ) @@ -141,15 +142,16 @@ def run_job() -> None: ) try: - upload_file_to_sharepoint( - client=sharepoint_client, - file_path=file_path, - base_path=sharepoint_base_path, - subpath=sharepoint_address, - ) - logger.info( - f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}" - ) + if report_type != FileDownloadButtonType.RAW_XML.value: + upload_file_to_sharepoint( + client=sharepoint_client, + file_path=file_path, + base_path=sharepoint_base_path, + subpath=sharepoint_address, + ) + logger.info( + f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}" + ) # Upload to s3 and update db upload_file_to_s3_and_update_db( diff --git a/backend/ecmk_fetcher/reports.py b/backend/ecmk_fetcher/reports.py index d8d11d50..7ab4fc91 100644 --- a/backend/ecmk_fetcher/reports.py +++ b/backend/ecmk_fetcher/reports.py @@ -14,6 +14,7 @@ class FileDownloadButtonType(Enum): REPORT_TYPES = [ FileDownloadButtonType.ASSESSOR_HUB_SITENOTE_REPORT.value, FileDownloadButtonType.SITENOTE_REPORT.value, + FileDownloadButtonType.RAW_XML.value, ] From 437810bb4f1ead1cc2d2a0550eb3bcd444b70d74 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 14 Apr 2026 10:40:33 +0000 Subject: [PATCH 02/19] flip logic statement --- backend/ecmk_fetcher/processor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py index e32f8dd5..0a910b72 100644 --- a/backend/ecmk_fetcher/processor.py +++ b/backend/ecmk_fetcher/processor.py @@ -142,7 +142,10 @@ def run_job() -> None: ) try: - if report_type != FileDownloadButtonType.RAW_XML.value: + if report_type == FileDownloadButtonType.RAW_XML.value: + # TODO: extract data from XML and write to file + pass + else: upload_file_to_sharepoint( client=sharepoint_client, file_path=file_path, From 9ccbfc2d1151b66d52afa0eb00c36c568d645a2c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 14 Apr 2026 16:24:46 +0000 Subject: [PATCH 03/19] =?UTF-8?q?Extract=20data=20from=20xml=20?= =?UTF-8?q?=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ecmk_fetcher/tests/test_xml_processor.py | 74 +++++++++++++++++++ backend/ecmk_fetcher/xml_processor.py | 8 ++ datatypes/epc/domain/field_mappings.py | 1 + pytest.ini | 2 +- 4 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 backend/ecmk_fetcher/tests/test_xml_processor.py create mode 100644 backend/ecmk_fetcher/xml_processor.py create mode 100644 datatypes/epc/domain/field_mappings.py diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py new file mode 100644 index 00000000..10381198 --- /dev/null +++ b/backend/ecmk_fetcher/tests/test_xml_processor.py @@ -0,0 +1,74 @@ +from backend.ecmk_fetcher.xml_processor import parse_rdsap + + +SAMPLE_XML = """ + + +
+ 1 + Fake Avenue + Random + AB24 5CD +
+
+
+ + + + 0 + + + + Main Dwelling + + + + 25.31 + 2.46 + 43.61 + 0 + + + + 26.16 + 2.44 + 42.33 + 0 + + + + 4 + 2 + 100mm + + + + + +
+""" + + +def test_parse_rdsap_wide_flat_contract(): + # arrange + act + result = parse_rdsap(SAMPLE_XML) + + # assert + assert result == { + "address": "1, Fake Avenue, Random, AB24 5CD", + "property_type": "House", + # Main Dwelling - floor 0 + "main_dwelling_floor_index_0_area_m2": 43.61, + "main_dwelling_floor_index_0_height_m": 2.46, + "main_dwelling_floor_index_0_heat_loss_perimeter_m": 25.31, + "main_dwelling_floor_index_0_party_wall_length_m": 0.0, + # Main Dwelling - floor 1 + "main_dwelling_floor_index_1_area_m2": 42.33, + "main_dwelling_floor_index_1_height_m": 2.44, + "main_dwelling_floor_index_1_heat_loss_perimeter_m": 26.16, + "main_dwelling_floor_index_1_party_wall_length_m": 0.0, + # Roof (building-level, repeated across floors or stored once) + "main_dwelling_roof_construction": "4", + "main_dwelling_roof_insulation_location": "2", + "main_dwelling_roof_insulation_thickness": "100mm", + } diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py new file mode 100644 index 00000000..5a392d38 --- /dev/null +++ b/backend/ecmk_fetcher/xml_processor.py @@ -0,0 +1,8 @@ +from typing import Any + +from etl.xml_survey_extraction.XmlParser import PROPERTY_TYPE_LOOKUP + + +# This file should ultimately live somewhere different, probably +def parse_rdsap(xml_string: str) -> Any: # TODO: define shape of return object + raise NotImplementedError diff --git a/datatypes/epc/domain/field_mappings.py b/datatypes/epc/domain/field_mappings.py new file mode 100644 index 00000000..55156d3d --- /dev/null +++ b/datatypes/epc/domain/field_mappings.py @@ -0,0 +1 @@ +PROPERTY_TYPE_LOOKUP = {0: "House", 1: "Bungalow", 2: "Flat", 3: "Maisonette"} diff --git a/pytest.ini b/pytest.ini index 55c2873a..8f8ceeef 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,6 +3,6 @@ pythonpath = . log_cli = true log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ markers = integration: mark a test as an integration test From 648fff29afb18112da1448a3730467bf41484df3 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 14 Apr 2026 16:37:49 +0000 Subject: [PATCH 04/19] parse to strongly typed dict. will map to excel row later --- .../ecmk_fetcher/tests/test_xml_processor.py | 40 ++++++++++++------- backend/ecmk_fetcher/xml_processor.py | 31 +++++++++++++- 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py index 10381198..7e19181b 100644 --- a/backend/ecmk_fetcher/tests/test_xml_processor.py +++ b/backend/ecmk_fetcher/tests/test_xml_processor.py @@ -49,7 +49,7 @@ SAMPLE_XML = """ Any: # TODO: define shape of return object +def parse_rdsap( + xml_string: str, +) -> SapPropertyDetails: raise NotImplementedError From 50e2ef457e98e2eba35cb653d0ac3ad02526c893 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 14 Apr 2026 16:44:34 +0000 Subject: [PATCH 05/19] =?UTF-8?q?Extract=20data=20from=20xml=20?= =?UTF-8?q?=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/ecmk_fetcher/xml_processor.py | 149 ++++++++++++++++++++++++-- 1 file changed, 143 insertions(+), 6 deletions(-) diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py index 03f435f5..dd999095 100644 --- a/backend/ecmk_fetcher/xml_processor.py +++ b/backend/ecmk_fetcher/xml_processor.py @@ -1,8 +1,10 @@ -from typing import Any, List, Optional, TypedDict +import xml.etree.ElementTree as ET +from typing import List, Optional, TypedDict from etl.xml_survey_extraction.XmlParser import PROPERTY_TYPE_LOOKUP +# This file should ultimately live somewhere different, probably class Floor(TypedDict): area_m2: float height_m: float @@ -28,8 +30,143 @@ class SapPropertyDetails(TypedDict): building_parts: List[BuildingPart] -# This file should ultimately live somewhere different, probably -def parse_rdsap( - xml_string: str, -) -> SapPropertyDetails: - raise NotImplementedError +def _get_namespace(tag: str) -> str: + return tag.split("}")[0].strip("{") + + +def _require_text(value: Optional[str], field: str) -> str: + if value is None: + raise ValueError(f"Missing required field: {field}") + return value + + +def _parse_float(value: Optional[str], field: str) -> float: + if value is None: + raise ValueError(f"Missing float field: {field}") + return float(value) + + +def _parse_int(value: Optional[str], field: str) -> int: + if value is None: + raise ValueError(f"Missing int field: {field}") + return int(value) + + +def _parse_thickness_mm(value: Optional[str]) -> Optional[float]: + if value is None: + return None + return float(value.replace("mm", "").strip()) + + +def parse_rdsap(xml_string: str) -> SapPropertyDetails: + root = ET.fromstring(xml_string) + + ns_uri: str = _get_namespace(root.tag) + ns: dict[str, str] = {"r": ns_uri} + + # --- Address --- + addr_elem = root.find(".//r:Address", ns) + if addr_elem is None: + raise ValueError("Address element not found") + + address_parts: List[str] = [ + addr_elem.findtext("r:Address-Line-1", default="", namespaces=ns), + addr_elem.findtext("r:Address-Line-2", default="", namespaces=ns), + addr_elem.findtext("r:Post-Town", default="", namespaces=ns), + addr_elem.findtext("r:Postcode", default="", namespaces=ns), + ] + + address: str = ", ".join(part for part in address_parts if part) + + # --- Property Type --- + prop_type_text = root.findtext(".//r:Property-Type", namespaces=ns) + prop_type_code: str = str(_parse_int(prop_type_text, "Property-Type")) + property_type: str = PROPERTY_TYPE_LOOKUP[prop_type_code] + + # --- Building Parts --- + building_parts: List[BuildingPart] = [] + + for bp in root.findall(".//r:SAP-Building-Part", ns): + + identifier_text = bp.findtext("r:Identifier", namespaces=ns) + identifier: str = _require_text(identifier_text, "Identifier") + + # Floors + floors: List[Floor] = [] + + for f in bp.findall(".//r:SAP-Floor-Dimension", ns): + + area = _parse_float( + f.findtext("r:Total-Floor-Area", namespaces=ns), + "Total-Floor-Area", + ) + + height = _parse_float( + f.findtext("r:Room-Height", namespaces=ns), + "Room-Height", + ) + + heat_loss = _parse_float( + f.findtext("r:Heat-Loss-Perimeter", namespaces=ns), + "Heat-Loss-Perimeter", + ) + + party_wall = _parse_float( + f.findtext("r:Party-Wall-Length", namespaces=ns), + "Party-Wall-Length", + ) + + floor: Floor = { + "area_m2": area, + "height_m": height, + "heat_loss_perimeter_m": heat_loss, + "party_wall_length_m": party_wall, + } + + floors.append(floor) + + # Roof (optional) + roof: Optional[Roof] = None + + roof_construction_text = bp.findtext("r:Roof-Construction", namespaces=ns) + roof_ins_loc_text = bp.findtext("r:Roof-Insulation-Location", namespaces=ns) + roof_thickness_text = bp.findtext("r:Roof-Insulation-Thickness", namespaces=ns) + + if ( + roof_construction_text is not None + or roof_ins_loc_text is not None + or roof_thickness_text is not None + ): + roof_dict: Roof = {} + + if roof_construction_text is not None: + roof_dict["construction"] = _parse_int( + roof_construction_text, "Roof-Construction" + ) + + if roof_ins_loc_text is not None: + roof_dict["insulation_location"] = _parse_int( + roof_ins_loc_text, "Roof-Insulation-Location" + ) + + thickness = _parse_thickness_mm(roof_thickness_text) + if thickness is not None: + roof_dict["insulation_thickness_mm"] = thickness + + roof = roof_dict + + building_part: BuildingPart = { + "identifier": identifier, + "floors": floors, + "roof": roof, + } + + building_parts.append(building_part) + + result: SapPropertyDetails = { + "address": address, + "property_type": property_type, + "building_parts": building_parts, + } + + return result From 8f8f6f1a1a81f3e67d6717657900761161f5ae14 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 14 Apr 2026 16:49:09 +0000 Subject: [PATCH 06/19] =?UTF-8?q?More=20complicated=20XML=20test=20?= =?UTF-8?q?=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ecmk_fetcher/tests/test_xml_processor.py | 85 ++++++++++++++----- 1 file changed, 66 insertions(+), 19 deletions(-) diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py index 7e19181b..451c1e3f 100644 --- a/backend/ecmk_fetcher/tests/test_xml_processor.py +++ b/backend/ecmk_fetcher/tests/test_xml_processor.py @@ -1,4 +1,4 @@ -from backend.ecmk_fetcher.xml_processor import parse_rdsap +from backend.ecmk_fetcher.xml_processor import SapPropertyDetails, parse_rdsap SAMPLE_XML = """ @@ -18,29 +18,64 @@ SAMPLE_XML = """25.31 + 2.46 + 43.61 + 0 + 0 + + + + 26.16 + 2.44 + 42.33 + 1 + 0 + + + + + + 2 + Extension + C + + 8 + 7 + AB + + 3 + 4 + + + + 6.85 + 2.24 + 4.46 + 0 + 0 + + + + + @@ -51,7 +86,7 @@ SAMPLE_XML = """ Date: Tue, 14 Apr 2026 16:52:41 +0000 Subject: [PATCH 07/19] =?UTF-8?q?More=20complicated=20XML=20test=20?= =?UTF-8?q?=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/ecmk_fetcher/tests/test_xml_processor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py index 451c1e3f..9f28062d 100644 --- a/backend/ecmk_fetcher/tests/test_xml_processor.py +++ b/backend/ecmk_fetcher/tests/test_xml_processor.py @@ -125,7 +125,10 @@ def test_parse_rdsap_contract(): "party_wall_length_m": 0.0, } ], - "roof": None, + "roof": { + "construction": 8, + "insulation_location": 7, + }, }, ], } From fcceeb27360cbce6932158bc8327a76a880fe5e4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 08:21:14 +0000 Subject: [PATCH 08/19] =?UTF-8?q?flatten=20sap=20property=20to=20excel=20r?= =?UTF-8?q?ow=20object=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ecmk_fetcher/tests/test_xml_processor.py | 93 ++++++++++++++++++- backend/ecmk_fetcher/xml_processor.py | 6 +- 2 files changed, 97 insertions(+), 2 deletions(-) diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py index 9f28062d..801676ec 100644 --- a/backend/ecmk_fetcher/tests/test_xml_processor.py +++ b/backend/ecmk_fetcher/tests/test_xml_processor.py @@ -1,4 +1,8 @@ -from backend.ecmk_fetcher.xml_processor import SapPropertyDetails, parse_rdsap +from backend.ecmk_fetcher.xml_processor import ( + SapPropertyDetails, + flatten_sap_property, + parse_rdsap, +) SAMPLE_XML = """ @@ -84,6 +88,39 @@ SAMPLE_XML = """ + + +
+ 5 + Somewhere + XY1 2AB +
+
+
+ + + 0 + + + Main Dwelling + + + 10.0 + 2.5 + 50.0 + 0 + 3.0 + + + + + + +
+""" + + def test_parse_rdsap_contract(): # arrange + act result: SapPropertyDetails = parse_rdsap(SAMPLE_XML) @@ -132,3 +169,57 @@ def test_parse_rdsap_contract(): }, ], } + + +def test_flatten_full(): + # Two building parts; Main Dwelling has two floors + full roof, + # Extension has one floor + partial roof (no thickness) + + # arrange + details: SapPropertyDetails = parse_rdsap(SAMPLE_XML) + + # act + result = flatten_sap_property(details) + + # assert + assert result == { + "address": "1, Fake Avenue, Random, AB24 5CD", + "property_type": "House", + "main_dwelling_floor_1_area_m2": 43.61, + "main_dwelling_floor_1_height_m": 2.46, + "main_dwelling_floor_1_heat_loss_perimeter_m": 25.31, + "main_dwelling_floor_1_party_wall_length_m": 0.0, + "main_dwelling_floor_2_area_m2": 42.33, + "main_dwelling_floor_2_height_m": 2.44, + "main_dwelling_floor_2_heat_loss_perimeter_m": 26.16, + "main_dwelling_floor_2_party_wall_length_m": 0.0, + "main_dwelling_roof_construction": 4, + "main_dwelling_roof_insulation_location": 2, + "main_dwelling_roof_insulation_thickness_mm": 100.0, + "extension_floor_1_area_m2": 4.46, + "extension_floor_1_height_m": 2.24, + "extension_floor_1_heat_loss_perimeter_m": 6.85, + "extension_floor_1_party_wall_length_m": 0.0, + "extension_roof_construction": 8, + "extension_roof_insulation_location": 7, + } + + +def test_flatten_no_roof(): + # Single building part with no roof — roof keys must be absent entirely + + # arrange + details: SapPropertyDetails = parse_rdsap(NO_ROOF_XML) + + # act + result = flatten_sap_property(details) + + # assert + assert result == { + "address": "5, Somewhere, XY1 2AB", + "property_type": "House", + "main_dwelling_floor_1_area_m2": 50.0, + "main_dwelling_floor_1_height_m": 2.5, + "main_dwelling_floor_1_heat_loss_perimeter_m": 10.0, + "main_dwelling_floor_1_party_wall_length_m": 3.0, + } diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py index dd999095..38086fe8 100644 --- a/backend/ecmk_fetcher/xml_processor.py +++ b/backend/ecmk_fetcher/xml_processor.py @@ -1,5 +1,5 @@ import xml.etree.ElementTree as ET -from typing import List, Optional, TypedDict +from typing import Any, List, Optional, TypedDict from etl.xml_survey_extraction.XmlParser import PROPERTY_TYPE_LOOKUP @@ -170,3 +170,7 @@ def parse_rdsap(xml_string: str) -> SapPropertyDetails: } return result + + +def flatten_sap_property(details: SapPropertyDetails) -> dict[str, Any]: + raise NotImplementedError From 7904eca1fa95d2a7996e3b1c736f554065693609 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 08:23:59 +0000 Subject: [PATCH 09/19] =?UTF-8?q?flatten=20sap=20property=20to=20excel=20r?= =?UTF-8?q?ow=20object=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/ecmk_fetcher/xml_processor.py | 30 ++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py index 38086fe8..8c9d0d1e 100644 --- a/backend/ecmk_fetcher/xml_processor.py +++ b/backend/ecmk_fetcher/xml_processor.py @@ -172,5 +172,33 @@ def parse_rdsap(xml_string: str) -> SapPropertyDetails: return result +def _normalise_identifier(identifier: str) -> str: + return identifier.lower().replace(" ", "_").replace("-", "_") + + def flatten_sap_property(details: SapPropertyDetails) -> dict[str, Any]: - raise NotImplementedError + row: dict[str, Any] = {} + + row["address"] = details["address"] + row["property_type"] = details["property_type"] + + for bp in details["building_parts"]: + prefix = _normalise_identifier(bp["identifier"]) + + for i, floor in enumerate(bp["floors"], start=1): + floor_prefix = f"{prefix}_floor_{i}" + row[f"{floor_prefix}_area_m2"] = floor["area_m2"] + row[f"{floor_prefix}_height_m"] = floor["height_m"] + row[f"{floor_prefix}_heat_loss_perimeter_m"] = floor["heat_loss_perimeter_m"] + row[f"{floor_prefix}_party_wall_length_m"] = floor["party_wall_length_m"] + + roof = bp.get("roof") + if roof: + if "construction" in roof: + row[f"{prefix}_roof_construction"] = roof["construction"] + if "insulation_location" in roof: + row[f"{prefix}_roof_insulation_location"] = roof["insulation_location"] + if "insulation_thickness_mm" in roof: + row[f"{prefix}_roof_insulation_thickness_mm"] = roof["insulation_thickness_mm"] + + return row From 0b301c4473b015aba0b99d0629f852cf6bbaf5dd Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 08:35:03 +0000 Subject: [PATCH 10/19] =?UTF-8?q?write=20to=20excel=20file=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/ecmk_fetcher/excel_writer.py | 5 + .../ecmk_fetcher/tests/test_excel_writer.py | 123 ++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 backend/ecmk_fetcher/excel_writer.py create mode 100644 backend/ecmk_fetcher/tests/test_excel_writer.py diff --git a/backend/ecmk_fetcher/excel_writer.py b/backend/ecmk_fetcher/excel_writer.py new file mode 100644 index 00000000..089e0187 --- /dev/null +++ b/backend/ecmk_fetcher/excel_writer.py @@ -0,0 +1,5 @@ +from typing import Any + + +def write_row(file_path: str, row_data: dict[str, Any]) -> None: + raise NotImplementedError diff --git a/backend/ecmk_fetcher/tests/test_excel_writer.py b/backend/ecmk_fetcher/tests/test_excel_writer.py new file mode 100644 index 00000000..3f730951 --- /dev/null +++ b/backend/ecmk_fetcher/tests/test_excel_writer.py @@ -0,0 +1,123 @@ +import os +import pathlib +import pytest +from openpyxl import load_workbook +from openpyxl.worksheet.worksheet import Worksheet + +from backend.ecmk_fetcher.excel_writer import write_row + + +@pytest.fixture +def xlsx_path(tmp_path: pathlib.Path) -> str: + return str(tmp_path / "output.xlsx") + + +def _active_sheet(file_path: str) -> Worksheet: + ws = load_workbook(file_path).active + assert isinstance(ws, Worksheet) + return ws + + +def test_write_row_creates_file(xlsx_path: str): + # arrange + row = { + "address": "1 Fake Avenue, AB24 5CD", + "property_type": "House", + "main_dwelling_floor_1_area_m2": 43.61, + } + + # act + write_row(xlsx_path, row) + + # assert + assert os.path.exists(xlsx_path) + ws = _active_sheet(xlsx_path) + assert [c.value for c in ws[1]] == list(row.keys()) + assert [c.value for c in ws[2]] == list(row.values()) + + +def test_write_row_appends_to_existing(xlsx_path: str): + # arrange + row_a = { + "address": "1 Fake Avenue, AB24 5CD", + "property_type": "House", + "main_dwelling_floor_1_area_m2": 43.61, + } + row_b = { + "address": "2 Other Street, XY1 2AB", + "property_type": "Flat", + "main_dwelling_floor_1_area_m2": 30.0, + } + + # act + write_row(xlsx_path, row_a) + write_row(xlsx_path, row_b) + + # assert + ws = _active_sheet(xlsx_path) + assert ws.max_row == 3 # 1 header + 2 data rows + assert [c.value for c in ws[1]] == list(row_a.keys()) + assert [c.value for c in ws[2]] == list(row_a.values()) + assert [c.value for c in ws[3]] == list(row_b.values()) + + +def test_write_row_inserts_new_columns_at_logical_positions(xlsx_path: str): + # arrange + # First row: main_dwelling floor 1 + roof + # Second row: also has main_dwelling floor 2 — should be inserted between floor 1 and roof, + # not appended to the end + row_a = { + "address": "1 Fake Avenue, AB24 5CD", + "property_type": "House", + "main_dwelling_floor_1_area_m2": 43.61, + "main_dwelling_floor_1_height_m": 2.46, + "main_dwelling_roof_construction": 4, + } + row_b = { + "address": "2 Other Street, XY1 2AB", + "property_type": "House", + "main_dwelling_floor_1_area_m2": 50.0, + "main_dwelling_floor_1_height_m": 2.5, + "main_dwelling_floor_2_area_m2": 48.0, + "main_dwelling_floor_2_height_m": 2.4, + "main_dwelling_roof_construction": 4, + } + + # act + write_row(xlsx_path, row_a) + write_row(xlsx_path, row_b) + + # assert + ws = _active_sheet(xlsx_path) + + assert [c.value for c in ws[1]] == [ + "address", + "property_type", + "main_dwelling_floor_1_area_m2", + "main_dwelling_floor_1_height_m", + "main_dwelling_floor_2_area_m2", # inserted before roof, not at end + "main_dwelling_floor_2_height_m", + "main_dwelling_roof_construction", + ] + + # row_a had no floor_2 data — those cells should be empty + assert [c.value for c in ws[2]] == [ + "1 Fake Avenue, AB24 5CD", + "House", + 43.61, + 2.46, + None, # main_dwelling_floor_2_area_m2 + None, # main_dwelling_floor_2_height_m + 4, + ] + + # row_b should be fully populated + assert [c.value for c in ws[3]] == [ + "2 Other Street, XY1 2AB", + "House", + 50.0, + 2.5, + 48.0, + 2.4, + 4, + ] From e10e29b58103ae09f1e2adcf2d44c580776776fa Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 08:38:33 +0000 Subject: [PATCH 11/19] =?UTF-8?q?write=20to=20excel=20file=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/ecmk_fetcher/excel_writer.py | 49 +++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/backend/ecmk_fetcher/excel_writer.py b/backend/ecmk_fetcher/excel_writer.py index 089e0187..1e65cf33 100644 --- a/backend/ecmk_fetcher/excel_writer.py +++ b/backend/ecmk_fetcher/excel_writer.py @@ -1,5 +1,52 @@ +import os from typing import Any +from openpyxl import Workbook, load_workbook +from openpyxl.worksheet.worksheet import Worksheet + def write_row(file_path: str, row_data: dict[str, Any]) -> None: - raise NotImplementedError + new_keys = list(row_data.keys()) + + if not os.path.exists(file_path): + wb = Workbook() + ws: Worksheet = wb.active # type: ignore[assignment] + ws.append(new_keys) + ws.append(list(row_data.values())) + wb.save(file_path) + return + + wb = load_workbook(file_path) + ws = wb.active # type: ignore[assignment] + + # Build a mutable header list and insert new columns using insert_cols so + # that existing row data shifts along with the headers. + headers: list[str] = [cell.value for cell in ws[1]] # type: ignore[misc] + + for key in new_keys: + if key in headers: + continue + + # Find the first key that comes after this one in new_keys that already + # exists in headers — insert before it to keep columns logically grouped. + insert_before: str | None = None + found = False + for k in new_keys: + if k == key: + found = True + continue + if found and k in headers: + insert_before = k + break + + if insert_before is not None: + col_idx = headers.index(insert_before) + 1 # 1-based + ws.insert_cols(col_idx) + ws.cell(row=1, column=col_idx, value=key) + headers.insert(col_idx - 1, key) + else: + headers.append(key) + ws.cell(row=1, column=len(headers), value=key) + + ws.append([row_data.get(col) for col in headers]) + wb.save(file_path) From 42387da5a3f695f5639f86ca2c392138824f9ea8 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 11:46:29 +0000 Subject: [PATCH 12/19] =?UTF-8?q?include=20property=20reference=20in=20exc?= =?UTF-8?q?el=20row=20dict=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/ecmk_fetcher/tests/test_xml_processor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py index 801676ec..2a78cc7b 100644 --- a/backend/ecmk_fetcher/tests/test_xml_processor.py +++ b/backend/ecmk_fetcher/tests/test_xml_processor.py @@ -183,6 +183,7 @@ def test_flatten_full(): # assert assert result == { + "reference": "1AB245CD", "address": "1, Fake Avenue, Random, AB24 5CD", "property_type": "House", "main_dwelling_floor_1_area_m2": 43.61, @@ -216,6 +217,7 @@ def test_flatten_no_roof(): # assert assert result == { + "reference": "5XY12AB", "address": "5, Somewhere, XY1 2AB", "property_type": "House", "main_dwelling_floor_1_area_m2": 50.0, From ede2eb970b2c7410826df2717d6cf6c5dbc3bfde Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 11:53:28 +0000 Subject: [PATCH 13/19] =?UTF-8?q?include=20property=20reference=20in=20Sap?= =?UTF-8?q?PropertyDetails=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/ecmk_fetcher/tests/test_xml_processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py index 2a78cc7b..1a308177 100644 --- a/backend/ecmk_fetcher/tests/test_xml_processor.py +++ b/backend/ecmk_fetcher/tests/test_xml_processor.py @@ -127,6 +127,7 @@ def test_parse_rdsap_contract(): # assert assert result == { + "reference": "1AB245CD", "address": "1, Fake Avenue, Random, AB24 5CD", "property_type": "House", "building_parts": [ From 1153d19f0e7f98a97c1a8a916a404b83a8ceaa99 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 11:57:53 +0000 Subject: [PATCH 14/19] =?UTF-8?q?include=20property=20reference=20in=20Sap?= =?UTF-8?q?PropertyDetails=20and=20excel=20row=20dict=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/ecmk_fetcher/xml_processor.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py index 8c9d0d1e..a9f7490f 100644 --- a/backend/ecmk_fetcher/xml_processor.py +++ b/backend/ecmk_fetcher/xml_processor.py @@ -3,6 +3,8 @@ from typing import Any, List, Optional, TypedDict from etl.xml_survey_extraction.XmlParser import PROPERTY_TYPE_LOOKUP +from backend.ecmk_fetcher.reports import build_property_id + # This file should ultimately live somewhere different, probably class Floor(TypedDict): @@ -25,6 +27,7 @@ class BuildingPart(TypedDict): class SapPropertyDetails(TypedDict): + reference: str address: str property_type: str building_parts: List[BuildingPart] @@ -69,14 +72,18 @@ def parse_rdsap(xml_string: str) -> SapPropertyDetails: if addr_elem is None: raise ValueError("Address element not found") + address_line_1: str = addr_elem.findtext("r:Address-Line-1", default="", namespaces=ns) + postcode: str = addr_elem.findtext("r:Postcode", default="", namespaces=ns) + address_parts: List[str] = [ - addr_elem.findtext("r:Address-Line-1", default="", namespaces=ns), + address_line_1, addr_elem.findtext("r:Address-Line-2", default="", namespaces=ns), addr_elem.findtext("r:Post-Town", default="", namespaces=ns), - addr_elem.findtext("r:Postcode", default="", namespaces=ns), + postcode, ] address: str = ", ".join(part for part in address_parts if part) + reference: str = build_property_id(address_line_1, postcode) # --- Property Type --- prop_type_text = root.findtext(".//r:Property-Type", namespaces=ns) @@ -164,6 +171,7 @@ def parse_rdsap(xml_string: str) -> SapPropertyDetails: building_parts.append(building_part) result: SapPropertyDetails = { + "reference": reference, "address": address, "property_type": property_type, "building_parts": building_parts, @@ -179,6 +187,7 @@ def _normalise_identifier(identifier: str) -> str: def flatten_sap_property(details: SapPropertyDetails) -> dict[str, Any]: row: dict[str, Any] = {} + row["reference"] = details["reference"] row["address"] = details["address"] row["property_type"] = details["property_type"] @@ -189,7 +198,9 @@ def flatten_sap_property(details: SapPropertyDetails) -> dict[str, Any]: floor_prefix = f"{prefix}_floor_{i}" row[f"{floor_prefix}_area_m2"] = floor["area_m2"] row[f"{floor_prefix}_height_m"] = floor["height_m"] - row[f"{floor_prefix}_heat_loss_perimeter_m"] = floor["heat_loss_perimeter_m"] + row[f"{floor_prefix}_heat_loss_perimeter_m"] = floor[ + "heat_loss_perimeter_m" + ] row[f"{floor_prefix}_party_wall_length_m"] = floor["party_wall_length_m"] roof = bp.get("roof") @@ -199,6 +210,8 @@ def flatten_sap_property(details: SapPropertyDetails) -> dict[str, Any]: if "insulation_location" in roof: row[f"{prefix}_roof_insulation_location"] = roof["insulation_location"] if "insulation_thickness_mm" in roof: - row[f"{prefix}_roof_insulation_thickness_mm"] = roof["insulation_thickness_mm"] + row[f"{prefix}_roof_insulation_thickness_mm"] = roof[ + "insulation_thickness_mm" + ] return row From 155eab86e4114f25dbad588287519bc87c933edc Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 13:31:40 +0000 Subject: [PATCH 15/19] update Dimensions.xlsx with XML contents --- backend/app/db/models/uploaded_file.py | 1 + backend/ecmk_fetcher/excel_writer.py | 3 +- backend/ecmk_fetcher/processor.py | 27 ++++++++++++++-- backend/ecmk_fetcher/reports.py | 2 ++ backend/ecmk_fetcher/upload.py | 12 +++++++ utils/sharepoint/domna_sharepoint_client.py | 35 +++++++++++++++++++++ utils/sharepoint/sharepoint_client.py | 13 +++++++- 7 files changed, 89 insertions(+), 4 deletions(-) diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index 71763790..5b34a752 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -16,6 +16,7 @@ class FileTypeEnum(enum.Enum): PAS_2023_OCCUPANCY = "pas_2023_occupancy" ECMK_SITE_NOTE = "ecmk_site_note" ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note" + ECMK_SURVEY_XML = "ecmk_survey_xml" class FileSourceEnum(enum.Enum): diff --git a/backend/ecmk_fetcher/excel_writer.py b/backend/ecmk_fetcher/excel_writer.py index 1e65cf33..f290614b 100644 --- a/backend/ecmk_fetcher/excel_writer.py +++ b/backend/ecmk_fetcher/excel_writer.py @@ -21,7 +21,8 @@ def write_row(file_path: str, row_data: dict[str, Any]) -> None: # Build a mutable header list and insert new columns using insert_cols so # that existing row data shifts along with the headers. - headers: list[str] = [cell.value for cell in ws[1]] # type: ignore[misc] + # Filter out None to guard against blank columns in the source file. + headers: list[str] = [cell.value for cell in ws[1] if cell.value is not None] # type: ignore[misc] for key in new_keys: if key in headers: diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py index 0a910b72..4f8c24ea 100644 --- a/backend/ecmk_fetcher/processor.py +++ b/backend/ecmk_fetcher/processor.py @@ -30,10 +30,13 @@ from backend.ecmk_fetcher.reports import ( build_property_id, map_report_type_to_db_file_type, ) +from backend.ecmk_fetcher.excel_writer import write_row from backend.ecmk_fetcher.upload import ( + upload_excel_to_sharepoint, upload_file_to_s3_and_update_db, upload_file_to_sharepoint, ) +from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap from utils.logger import setup_logger from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient from utils.sharepoint.domna_sites import DomnaSites @@ -60,6 +63,15 @@ def run_job() -> None: ) sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments" + sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling" + + DIMENSIONS_FILENAME: str = "Dimensions.xlsx" + local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME) + + sharepoint_client.download_file( + sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}", + local_path=local_dimensions_path, + ) s3_bucket: str = "retrofit-energy-assessments-dev" @@ -143,8 +155,19 @@ def run_job() -> None: try: if report_type == FileDownloadButtonType.RAW_XML.value: - # TODO: extract data from XML and write to file - pass + with open(file_path, "r", encoding="utf-8") as f: + xml_string = f.read() + details = parse_rdsap(xml_string) + row_data = flatten_sap_property(details) + write_row(local_dimensions_path, row_data) + upload_excel_to_sharepoint( + client=sharepoint_client, + file_path=local_dimensions_path, + sharepoint_path=sharepoint_excel_path, + ) + logger.info( + f"Written dimensions row and uploaded Dimensions.xlsx for {address}" + ) else: upload_file_to_sharepoint( client=sharepoint_client, diff --git a/backend/ecmk_fetcher/reports.py b/backend/ecmk_fetcher/reports.py index 7ab4fc91..d2f8ea52 100644 --- a/backend/ecmk_fetcher/reports.py +++ b/backend/ecmk_fetcher/reports.py @@ -24,6 +24,8 @@ def map_report_type_to_db_file_type(report_type: int) -> FileTypeEnum: return FileTypeEnum.ECMK_SITE_NOTE case FileDownloadButtonType.SITENOTE_REPORT.value: return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE + case FileDownloadButtonType.RAW_XML.value: + return FileTypeEnum.ECMK_SURVEY_XML case _: raise ValueError("Unknown report type") diff --git a/backend/ecmk_fetcher/upload.py b/backend/ecmk_fetcher/upload.py index 0a744e53..8cb451b0 100644 --- a/backend/ecmk_fetcher/upload.py +++ b/backend/ecmk_fetcher/upload.py @@ -28,6 +28,18 @@ def upload_file_to_sharepoint( ) +def upload_excel_to_sharepoint( + client: DomnaSharepointClient, + file_path: str, + sharepoint_path: str, +) -> None: + client.upload_file( + file_path=file_path, + sharepoint_path=sharepoint_path, + file_name=os.path.basename(file_path), + ) + + def upload_file_to_s3_and_update_db( bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum ) -> None: diff --git a/utils/sharepoint/domna_sharepoint_client.py b/utils/sharepoint/domna_sharepoint_client.py index 67e079ed..5e0255ac 100644 --- a/utils/sharepoint/domna_sharepoint_client.py +++ b/utils/sharepoint/domna_sharepoint_client.py @@ -90,6 +90,41 @@ class DomnaSharepointClient: file_name, get_file_stream(file_path), sharepoint_path ) + def download_file(self, sharepoint_path: str, local_path: str) -> bool: + """ + Download a file from SharePoint to a local path. + + Returns True if the file was downloaded, False if it does not exist yet. + Raises on any other error. + """ + sharepoint_client = SharePointClient( + tenant_id=self.sharepoint_tenant_id, + client_id=self.sharepoint_client_id, + client_secret=self.sharepoint_client_secret, + site_id=self.sharepoint_drive.value, + ) + + try: + metadata: Dict[str, Any] = sharepoint_client.get_file_metadata(sharepoint_path) + except ValueError: + return False + + download_url: Optional[str] = metadata.get("@microsoft.graph.downloadUrl") + if not download_url: + return False + + content: BytesIO = SharePointClient.download_sharepoint_file(download_url) + + parent_dir = os.path.dirname(local_path) + if parent_dir: + os.makedirs(parent_dir, exist_ok=True) + + with open(local_path, "wb") as f: + f.write(content.getvalue()) + + self.logger.debug(f"Downloaded SharePoint file to: {local_path}") + return True + def create_temp_file(self, content: BytesIO, path: str): # Ensure the path is under /tmp/ new_path = os.path.join("/tmp/sharepoint", path) diff --git a/utils/sharepoint/sharepoint_client.py b/utils/sharepoint/sharepoint_client.py index 71f82b68..5807c3bd 100644 --- a/utils/sharepoint/sharepoint_client.py +++ b/utils/sharepoint/sharepoint_client.py @@ -278,6 +278,17 @@ class SharePointClient: # logger.debug(f"Listing folder contents from URL: {url}") return "GET", url, None + @api_call_decorator + def get_file_metadata(self, file_path: str) -> Dict[str, Any]: + """ + GET /drives/{drive-id}/root:/{file_path} + + Returns file metadata, including '@microsoft.graph.downloadUrl'. + Raises ValueError if the file does not exist (404). + """ + url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{file_path}" + return "GET", url, None + @api_call_decorator def create_folder(self, file_name: str, folder_path: str) -> Dict[str, Any]: """ @@ -325,7 +336,7 @@ class SharePointClient: return self.upload_file(file_name, sharepoint_parent_id, file_stream) @staticmethod - def download_sharepoint_file(download_url): + def download_sharepoint_file(download_url: str) -> BytesIO: """ Downloads a file from the given URL and returns its content. From 160dec11b188df92f54df15a748f7c6e102af9b4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 14:26:09 +0000 Subject: [PATCH 16/19] =?UTF-8?q?insulation=20thickness=20can=20be=20strin?= =?UTF-8?q?g=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ecmk_fetcher/tests/test_xml_processor.py | 51 +++++++++++++++++++ datatypes/epc/domain/field_mappings.py | 2 + 2 files changed, 53 insertions(+) diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py index 1a308177..3fd9e45e 100644 --- a/backend/ecmk_fetcher/tests/test_xml_processor.py +++ b/backend/ecmk_fetcher/tests/test_xml_processor.py @@ -172,6 +172,57 @@ def test_parse_rdsap_contract(): } +ND_THICKNESS_XML = """ + + +
+ 1 + Somewhere + AB1 2CD +
+
+
+ + + 0 + + + Main Dwelling + 4 + 2 + ND + + + 10.0 + 2.5 + 50.0 + 0 + 0 + + + + + + +
+""" + + +def test_parse_rdsap_nd_thickness(): + # 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Thickness + # — it should be retained as-is rather than raising + + # arrange + act + result: SapPropertyDetails = parse_rdsap(ND_THICKNESS_XML) + + # assert + assert result["building_parts"][0]["roof"] == { + "construction": 4, + "insulation_location": 2, + "insulation_thickness_mm": "ND", + } + + def test_flatten_full(): # Two building parts; Main Dwelling has two floors + full roof, # Extension has one floor + partial roof (no thickness) diff --git a/datatypes/epc/domain/field_mappings.py b/datatypes/epc/domain/field_mappings.py index 55156d3d..cc0f9067 100644 --- a/datatypes/epc/domain/field_mappings.py +++ b/datatypes/epc/domain/field_mappings.py @@ -1 +1,3 @@ PROPERTY_TYPE_LOOKUP = {0: "House", 1: "Bungalow", 2: "Flat", 3: "Maisonette"} +ROOF_CONSTRUCTION_LOOKUP = {} +ROOF_INSULATION_LOCATION_LOOKUP = {} From 2dd9b9ce844d611a81d76d5d445b07b72af94042 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 14:29:03 +0000 Subject: [PATCH 17/19] =?UTF-8?q?insulation=20thickness=20can=20be=20strin?= =?UTF-8?q?g=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/ecmk_fetcher/xml_processor.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py index a9f7490f..b6dabbbe 100644 --- a/backend/ecmk_fetcher/xml_processor.py +++ b/backend/ecmk_fetcher/xml_processor.py @@ -17,7 +17,7 @@ class Floor(TypedDict): class Roof(TypedDict, total=False): construction: int # TODO: map to str insulation_location: int # TODO: map to str - insulation_thickness_mm: float + insulation_thickness_mm: float | str class BuildingPart(TypedDict): @@ -55,10 +55,14 @@ def _parse_int(value: Optional[str], field: str) -> int: return int(value) -def _parse_thickness_mm(value: Optional[str]) -> Optional[float]: +def _parse_thickness_mm(value: Optional[str]) -> Optional[float | str]: if value is None: return None - return float(value.replace("mm", "").strip()) + stripped = value.replace("mm", "").strip() + try: + return float(stripped) + except ValueError: + return stripped def parse_rdsap(xml_string: str) -> SapPropertyDetails: From 9e43b9d16eefe2407577eef444e2af1bc7e6c590 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 14:34:11 +0000 Subject: [PATCH 18/19] import correct PROPERTY_TYPE_LOOKUP --- backend/ecmk_fetcher/xml_processor.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py index b6dabbbe..aff284ef 100644 --- a/backend/ecmk_fetcher/xml_processor.py +++ b/backend/ecmk_fetcher/xml_processor.py @@ -1,9 +1,9 @@ import xml.etree.ElementTree as ET from typing import Any, List, Optional, TypedDict -from etl.xml_survey_extraction.XmlParser import PROPERTY_TYPE_LOOKUP from backend.ecmk_fetcher.reports import build_property_id +from datatypes.epc.domain.field_mappings import PROPERTY_TYPE_LOOKUP # This file should ultimately live somewhere different, probably @@ -76,7 +76,9 @@ def parse_rdsap(xml_string: str) -> SapPropertyDetails: if addr_elem is None: raise ValueError("Address element not found") - address_line_1: str = addr_elem.findtext("r:Address-Line-1", default="", namespaces=ns) + address_line_1: str = addr_elem.findtext( + "r:Address-Line-1", default="", namespaces=ns + ) postcode: str = addr_elem.findtext("r:Postcode", default="", namespaces=ns) address_parts: List[str] = [ @@ -91,7 +93,7 @@ def parse_rdsap(xml_string: str) -> SapPropertyDetails: # --- Property Type --- prop_type_text = root.findtext(".//r:Property-Type", namespaces=ns) - prop_type_code: str = str(_parse_int(prop_type_text, "Property-Type")) + prop_type_code: int = _parse_int(prop_type_text, "Property-Type") property_type: str = PROPERTY_TYPE_LOOKUP[prop_type_code] # --- Building Parts --- From 6333c36a298b4bb156bfc1c56d97fb5995e565e1 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 15 Apr 2026 14:59:43 +0000 Subject: [PATCH 19/19] =?UTF-8?q?insulation=20location=20can=20be=20string?= =?UTF-8?q?=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ecmk_fetcher/tests/test_xml_processor.py | 50 +++++++++++++++++++ backend/ecmk_fetcher/xml_processor.py | 11 ++-- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/backend/ecmk_fetcher/tests/test_xml_processor.py b/backend/ecmk_fetcher/tests/test_xml_processor.py index 3fd9e45e..3695b09d 100644 --- a/backend/ecmk_fetcher/tests/test_xml_processor.py +++ b/backend/ecmk_fetcher/tests/test_xml_processor.py @@ -207,6 +207,41 @@ ND_THICKNESS_XML = """ + + +
+ 1 + Somewhere + AB1 2CD +
+
+
+ + + 0 + + + Main Dwelling + 4 + ND + 250 + + + 10.0 + 2.5 + 50.0 + 0 + 0 + + + + + + +
+""" + def test_parse_rdsap_nd_thickness(): # 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Thickness @@ -223,6 +258,21 @@ def test_parse_rdsap_nd_thickness(): } +def test_parse_rdsap_nd_location(): + # 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Location + # — it should be retained as-is rather than raising + + # arrange + act + result: SapPropertyDetails = parse_rdsap(ND_INSULATION_LOCATION_XML) + + # assert + assert result["building_parts"][0]["roof"] == { + "construction": 4, + "insulation_location": "ND", + "insulation_thickness_mm": 250, + } + + def test_flatten_full(): # Two building parts; Main Dwelling has two floors + full roof, # Extension has one floor + partial roof (no thickness) diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py index aff284ef..f993038b 100644 --- a/backend/ecmk_fetcher/xml_processor.py +++ b/backend/ecmk_fetcher/xml_processor.py @@ -16,7 +16,7 @@ class Floor(TypedDict): class Roof(TypedDict, total=False): construction: int # TODO: map to str - insulation_location: int # TODO: map to str + insulation_location: int | str # TODO: map to str insulation_thickness_mm: float | str @@ -158,9 +158,12 @@ def parse_rdsap(xml_string: str) -> SapPropertyDetails: ) if roof_ins_loc_text is not None: - roof_dict["insulation_location"] = _parse_int( - roof_ins_loc_text, "Roof-Insulation-Location" - ) + try: + roof_dict["insulation_location"] = _parse_int( + roof_ins_loc_text, "Roof-Insulation-Location" + ) + except ValueError: + roof_dict["insulation_location"] = roof_ins_loc_text thickness = _parse_thickness_mm(roof_thickness_text) if thickness is not None: