diff --git a/backend/ecmk_fetcher/xml_processor.py b/backend/ecmk_fetcher/xml_processor.py index 03f435f5..dd999095 100644 --- a/backend/ecmk_fetcher/xml_processor.py +++ b/backend/ecmk_fetcher/xml_processor.py @@ -1,8 +1,10 @@ -from typing import Any, List, Optional, TypedDict +import xml.etree.ElementTree as ET +from typing import List, Optional, TypedDict from etl.xml_survey_extraction.XmlParser import PROPERTY_TYPE_LOOKUP +# This file should ultimately live somewhere different, probably class Floor(TypedDict): area_m2: float height_m: float @@ -28,8 +30,143 @@ class SapPropertyDetails(TypedDict): building_parts: List[BuildingPart] -# This file should ultimately live somewhere different, probably -def parse_rdsap( - xml_string: str, -) -> SapPropertyDetails: - raise NotImplementedError +def _get_namespace(tag: str) -> str: + return tag.split("}")[0].strip("{") + + +def _require_text(value: Optional[str], field: str) -> str: + if value is None: + raise ValueError(f"Missing required field: {field}") + return value + + +def _parse_float(value: Optional[str], field: str) -> float: + if value is None: + raise ValueError(f"Missing float field: {field}") + return float(value) + + +def _parse_int(value: Optional[str], field: str) -> int: + if value is None: + raise ValueError(f"Missing int field: {field}") + return int(value) + + +def _parse_thickness_mm(value: Optional[str]) -> Optional[float]: + if value is None: + return None + return float(value.replace("mm", "").strip()) + + +def parse_rdsap(xml_string: str) -> SapPropertyDetails: + root = ET.fromstring(xml_string) + + ns_uri: str = _get_namespace(root.tag) + ns: dict[str, str] = {"r": ns_uri} + + # --- Address --- + addr_elem = root.find(".//r:Address", ns) + if addr_elem is None: + raise ValueError("Address element not found") + + address_parts: List[str] = [ + addr_elem.findtext("r:Address-Line-1", default="", namespaces=ns), + addr_elem.findtext("r:Address-Line-2", default="", namespaces=ns), + addr_elem.findtext("r:Post-Town", default="", namespaces=ns), + addr_elem.findtext("r:Postcode", default="", namespaces=ns), + ] + + address: str = ", ".join(part for part in address_parts if part) + + # --- Property Type --- + prop_type_text = root.findtext(".//r:Property-Type", namespaces=ns) + prop_type_code: str = str(_parse_int(prop_type_text, "Property-Type")) + property_type: str = PROPERTY_TYPE_LOOKUP[prop_type_code] + + # --- Building Parts --- + building_parts: List[BuildingPart] = [] + + for bp in root.findall(".//r:SAP-Building-Part", ns): + + identifier_text = bp.findtext("r:Identifier", namespaces=ns) + identifier: str = _require_text(identifier_text, "Identifier") + + # Floors + floors: List[Floor] = [] + + for f in bp.findall(".//r:SAP-Floor-Dimension", ns): + + area = _parse_float( + f.findtext("r:Total-Floor-Area", namespaces=ns), + "Total-Floor-Area", + ) + + height = _parse_float( + f.findtext("r:Room-Height", namespaces=ns), + "Room-Height", + ) + + heat_loss = _parse_float( + f.findtext("r:Heat-Loss-Perimeter", namespaces=ns), + "Heat-Loss-Perimeter", + ) + + party_wall = _parse_float( + f.findtext("r:Party-Wall-Length", namespaces=ns), + "Party-Wall-Length", + ) + + floor: Floor = { + "area_m2": area, + "height_m": height, + "heat_loss_perimeter_m": heat_loss, + "party_wall_length_m": party_wall, + } + + floors.append(floor) + + # Roof (optional) + roof: Optional[Roof] = None + + roof_construction_text = bp.findtext("r:Roof-Construction", namespaces=ns) + roof_ins_loc_text = bp.findtext("r:Roof-Insulation-Location", namespaces=ns) + roof_thickness_text = bp.findtext("r:Roof-Insulation-Thickness", namespaces=ns) + + if ( + roof_construction_text is not None + or roof_ins_loc_text is not None + or roof_thickness_text is not None + ): + roof_dict: Roof = {} + + if roof_construction_text is not None: + roof_dict["construction"] = _parse_int( + roof_construction_text, "Roof-Construction" + ) + + if roof_ins_loc_text is not None: + roof_dict["insulation_location"] = _parse_int( + roof_ins_loc_text, "Roof-Insulation-Location" + ) + + thickness = _parse_thickness_mm(roof_thickness_text) + if thickness is not None: + roof_dict["insulation_thickness_mm"] = thickness + + roof = roof_dict + + building_part: BuildingPart = { + "identifier": identifier, + "floors": floors, + "roof": roof, + } + + building_parts.append(building_part) + + result: SapPropertyDetails = { + "address": address, + "property_type": property_type, + "building_parts": building_parts, + } + + return result