mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Extract data from xml 🟩
This commit is contained in:
parent
648fff29af
commit
50e2ef457e
1 changed files with 143 additions and 6 deletions
|
|
@ -1,8 +1,10 @@
|
|||
from typing import Any, List, Optional, TypedDict
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import List, Optional, TypedDict
|
||||
|
||||
from etl.xml_survey_extraction.XmlParser import PROPERTY_TYPE_LOOKUP
|
||||
|
||||
|
||||
# This file should ultimately live somewhere different, probably
|
||||
class Floor(TypedDict):
|
||||
area_m2: float
|
||||
height_m: float
|
||||
|
|
@ -28,8 +30,143 @@ class SapPropertyDetails(TypedDict):
|
|||
building_parts: List[BuildingPart]
|
||||
|
||||
|
||||
# This file should ultimately live somewhere different, probably
|
||||
def parse_rdsap(
|
||||
xml_string: str,
|
||||
) -> SapPropertyDetails:
|
||||
raise NotImplementedError
|
||||
def _get_namespace(tag: str) -> str:
|
||||
return tag.split("}")[0].strip("{")
|
||||
|
||||
|
||||
def _require_text(value: Optional[str], field: str) -> str:
|
||||
if value is None:
|
||||
raise ValueError(f"Missing required field: {field}")
|
||||
return value
|
||||
|
||||
|
||||
def _parse_float(value: Optional[str], field: str) -> float:
|
||||
if value is None:
|
||||
raise ValueError(f"Missing float field: {field}")
|
||||
return float(value)
|
||||
|
||||
|
||||
def _parse_int(value: Optional[str], field: str) -> int:
|
||||
if value is None:
|
||||
raise ValueError(f"Missing int field: {field}")
|
||||
return int(value)
|
||||
|
||||
|
||||
def _parse_thickness_mm(value: Optional[str]) -> Optional[float]:
|
||||
if value is None:
|
||||
return None
|
||||
return float(value.replace("mm", "").strip())
|
||||
|
||||
|
||||
def parse_rdsap(xml_string: str) -> SapPropertyDetails:
|
||||
root = ET.fromstring(xml_string)
|
||||
|
||||
ns_uri: str = _get_namespace(root.tag)
|
||||
ns: dict[str, str] = {"r": ns_uri}
|
||||
|
||||
# --- Address ---
|
||||
addr_elem = root.find(".//r:Address", ns)
|
||||
if addr_elem is None:
|
||||
raise ValueError("Address element not found")
|
||||
|
||||
address_parts: List[str] = [
|
||||
addr_elem.findtext("r:Address-Line-1", default="", namespaces=ns),
|
||||
addr_elem.findtext("r:Address-Line-2", default="", namespaces=ns),
|
||||
addr_elem.findtext("r:Post-Town", default="", namespaces=ns),
|
||||
addr_elem.findtext("r:Postcode", default="", namespaces=ns),
|
||||
]
|
||||
|
||||
address: str = ", ".join(part for part in address_parts if part)
|
||||
|
||||
# --- Property Type ---
|
||||
prop_type_text = root.findtext(".//r:Property-Type", namespaces=ns)
|
||||
prop_type_code: str = str(_parse_int(prop_type_text, "Property-Type"))
|
||||
property_type: str = PROPERTY_TYPE_LOOKUP[prop_type_code]
|
||||
|
||||
# --- Building Parts ---
|
||||
building_parts: List[BuildingPart] = []
|
||||
|
||||
for bp in root.findall(".//r:SAP-Building-Part", ns):
|
||||
|
||||
identifier_text = bp.findtext("r:Identifier", namespaces=ns)
|
||||
identifier: str = _require_text(identifier_text, "Identifier")
|
||||
|
||||
# Floors
|
||||
floors: List[Floor] = []
|
||||
|
||||
for f in bp.findall(".//r:SAP-Floor-Dimension", ns):
|
||||
|
||||
area = _parse_float(
|
||||
f.findtext("r:Total-Floor-Area", namespaces=ns),
|
||||
"Total-Floor-Area",
|
||||
)
|
||||
|
||||
height = _parse_float(
|
||||
f.findtext("r:Room-Height", namespaces=ns),
|
||||
"Room-Height",
|
||||
)
|
||||
|
||||
heat_loss = _parse_float(
|
||||
f.findtext("r:Heat-Loss-Perimeter", namespaces=ns),
|
||||
"Heat-Loss-Perimeter",
|
||||
)
|
||||
|
||||
party_wall = _parse_float(
|
||||
f.findtext("r:Party-Wall-Length", namespaces=ns),
|
||||
"Party-Wall-Length",
|
||||
)
|
||||
|
||||
floor: Floor = {
|
||||
"area_m2": area,
|
||||
"height_m": height,
|
||||
"heat_loss_perimeter_m": heat_loss,
|
||||
"party_wall_length_m": party_wall,
|
||||
}
|
||||
|
||||
floors.append(floor)
|
||||
|
||||
# Roof (optional)
|
||||
roof: Optional[Roof] = None
|
||||
|
||||
roof_construction_text = bp.findtext("r:Roof-Construction", namespaces=ns)
|
||||
roof_ins_loc_text = bp.findtext("r:Roof-Insulation-Location", namespaces=ns)
|
||||
roof_thickness_text = bp.findtext("r:Roof-Insulation-Thickness", namespaces=ns)
|
||||
|
||||
if (
|
||||
roof_construction_text is not None
|
||||
or roof_ins_loc_text is not None
|
||||
or roof_thickness_text is not None
|
||||
):
|
||||
roof_dict: Roof = {}
|
||||
|
||||
if roof_construction_text is not None:
|
||||
roof_dict["construction"] = _parse_int(
|
||||
roof_construction_text, "Roof-Construction"
|
||||
)
|
||||
|
||||
if roof_ins_loc_text is not None:
|
||||
roof_dict["insulation_location"] = _parse_int(
|
||||
roof_ins_loc_text, "Roof-Insulation-Location"
|
||||
)
|
||||
|
||||
thickness = _parse_thickness_mm(roof_thickness_text)
|
||||
if thickness is not None:
|
||||
roof_dict["insulation_thickness_mm"] = thickness
|
||||
|
||||
roof = roof_dict
|
||||
|
||||
building_part: BuildingPart = {
|
||||
"identifier": identifier,
|
||||
"floors": floors,
|
||||
"roof": roof,
|
||||
}
|
||||
|
||||
building_parts.append(building_part)
|
||||
|
||||
result: SapPropertyDetails = {
|
||||
"address": address,
|
||||
"property_type": property_type,
|
||||
"building_parts": building_parts,
|
||||
}
|
||||
|
||||
return result
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue