Merge branch 'main' into caveman-claude

This commit is contained in:
Daniel Roth 2026-04-17 11:25:24 +00:00
commit 638471a5d6
12 changed files with 835 additions and 11 deletions

View file

@ -16,6 +16,7 @@ class FileTypeEnum(enum.Enum):
PAS_2023_OCCUPANCY = "pas_2023_occupancy"
ECMK_SITE_NOTE = "ecmk_site_note"
ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
ECMK_SURVEY_XML = "ecmk_survey_xml"
class FileSourceEnum(enum.Enum):

View file

@ -0,0 +1,53 @@
import os
from typing import Any
from openpyxl import Workbook, load_workbook
from openpyxl.worksheet.worksheet import Worksheet
def write_row(file_path: str, row_data: dict[str, Any]) -> None:
new_keys = list(row_data.keys())
if not os.path.exists(file_path):
wb = Workbook()
ws: Worksheet = wb.active # type: ignore[assignment]
ws.append(new_keys)
ws.append(list(row_data.values()))
wb.save(file_path)
return
wb = load_workbook(file_path)
ws = wb.active # type: ignore[assignment]
# Build a mutable header list and insert new columns using insert_cols so
# that existing row data shifts along with the headers.
# Filter out None to guard against blank columns in the source file.
headers: list[str] = [cell.value for cell in ws[1] if cell.value is not None] # type: ignore[misc]
for key in new_keys:
if key in headers:
continue
# Find the first key that comes after this one in new_keys that already
# exists in headers — insert before it to keep columns logically grouped.
insert_before: str | None = None
found = False
for k in new_keys:
if k == key:
found = True
continue
if found and k in headers:
insert_before = k
break
if insert_before is not None:
col_idx = headers.index(insert_before) + 1 # 1-based
ws.insert_cols(col_idx)
ws.cell(row=1, column=col_idx, value=key)
headers.insert(col_idx - 1, key)
else:
headers.append(key)
ws.cell(row=1, column=len(headers), value=key)
ws.append([row_data.get(col) for col in headers])
wb.save(file_path)

View file

@ -26,13 +26,17 @@ from backend.ecmk_fetcher.browser import (
)
from backend.ecmk_fetcher.reports import (
REPORT_TYPES,
FileDownloadButtonType,
build_property_id,
map_report_type_to_db_file_type,
)
from backend.ecmk_fetcher.excel_writer import write_row
from backend.ecmk_fetcher.upload import (
upload_excel_to_sharepoint,
upload_file_to_s3_and_update_db,
upload_file_to_sharepoint,
)
from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
@ -59,6 +63,15 @@ def run_job() -> None:
)
sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME)
sharepoint_client.download_file(
sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
local_path=local_dimensions_path,
)
s3_bucket: str = "retrofit-energy-assessments-dev"
@ -141,15 +154,30 @@ def run_job() -> None:
)
try:
upload_file_to_sharepoint(
client=sharepoint_client,
file_path=file_path,
base_path=sharepoint_base_path,
subpath=sharepoint_address,
)
logger.info(
f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
)
if report_type == FileDownloadButtonType.RAW_XML.value:
with open(file_path, "r", encoding="utf-8") as f:
xml_string = f.read()
details = parse_rdsap(xml_string)
row_data = flatten_sap_property(details)
write_row(local_dimensions_path, row_data)
upload_excel_to_sharepoint(
client=sharepoint_client,
file_path=local_dimensions_path,
sharepoint_path=sharepoint_excel_path,
)
logger.info(
f"Written dimensions row and uploaded Dimensions.xlsx for {address}"
)
else:
upload_file_to_sharepoint(
client=sharepoint_client,
file_path=file_path,
base_path=sharepoint_base_path,
subpath=sharepoint_address,
)
logger.info(
f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
)
# Upload to s3 and update db
upload_file_to_s3_and_update_db(

View file

@ -14,6 +14,7 @@ class FileDownloadButtonType(Enum):
REPORT_TYPES = [
FileDownloadButtonType.ASSESSOR_HUB_SITENOTE_REPORT.value,
FileDownloadButtonType.SITENOTE_REPORT.value,
FileDownloadButtonType.RAW_XML.value,
]
@ -23,6 +24,8 @@ def map_report_type_to_db_file_type(report_type: int) -> FileTypeEnum:
return FileTypeEnum.ECMK_SITE_NOTE
case FileDownloadButtonType.SITENOTE_REPORT.value:
return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE
case FileDownloadButtonType.RAW_XML.value:
return FileTypeEnum.ECMK_SURVEY_XML
case _:
raise ValueError("Unknown report type")

View file

@ -0,0 +1,123 @@
import os
import pathlib
import pytest
from openpyxl import load_workbook
from openpyxl.worksheet.worksheet import Worksheet
from backend.ecmk_fetcher.excel_writer import write_row
@pytest.fixture
def xlsx_path(tmp_path: pathlib.Path) -> str:
return str(tmp_path / "output.xlsx")
def _active_sheet(file_path: str) -> Worksheet:
ws = load_workbook(file_path).active
assert isinstance(ws, Worksheet)
return ws
def test_write_row_creates_file(xlsx_path: str):
# arrange
row = {
"address": "1 Fake Avenue, AB24 5CD",
"property_type": "House",
"main_dwelling_floor_1_area_m2": 43.61,
}
# act
write_row(xlsx_path, row)
# assert
assert os.path.exists(xlsx_path)
ws = _active_sheet(xlsx_path)
assert [c.value for c in ws[1]] == list(row.keys())
assert [c.value for c in ws[2]] == list(row.values())
def test_write_row_appends_to_existing(xlsx_path: str):
# arrange
row_a = {
"address": "1 Fake Avenue, AB24 5CD",
"property_type": "House",
"main_dwelling_floor_1_area_m2": 43.61,
}
row_b = {
"address": "2 Other Street, XY1 2AB",
"property_type": "Flat",
"main_dwelling_floor_1_area_m2": 30.0,
}
# act
write_row(xlsx_path, row_a)
write_row(xlsx_path, row_b)
# assert
ws = _active_sheet(xlsx_path)
assert ws.max_row == 3 # 1 header + 2 data rows
assert [c.value for c in ws[1]] == list(row_a.keys())
assert [c.value for c in ws[2]] == list(row_a.values())
assert [c.value for c in ws[3]] == list(row_b.values())
def test_write_row_inserts_new_columns_at_logical_positions(xlsx_path: str):
# arrange
# First row: main_dwelling floor 1 + roof
# Second row: also has main_dwelling floor 2 — should be inserted between floor 1 and roof,
# not appended to the end
row_a = {
"address": "1 Fake Avenue, AB24 5CD",
"property_type": "House",
"main_dwelling_floor_1_area_m2": 43.61,
"main_dwelling_floor_1_height_m": 2.46,
"main_dwelling_roof_construction": 4,
}
row_b = {
"address": "2 Other Street, XY1 2AB",
"property_type": "House",
"main_dwelling_floor_1_area_m2": 50.0,
"main_dwelling_floor_1_height_m": 2.5,
"main_dwelling_floor_2_area_m2": 48.0,
"main_dwelling_floor_2_height_m": 2.4,
"main_dwelling_roof_construction": 4,
}
# act
write_row(xlsx_path, row_a)
write_row(xlsx_path, row_b)
# assert
ws = _active_sheet(xlsx_path)
assert [c.value for c in ws[1]] == [
"address",
"property_type",
"main_dwelling_floor_1_area_m2",
"main_dwelling_floor_1_height_m",
"main_dwelling_floor_2_area_m2", # inserted before roof, not at end
"main_dwelling_floor_2_height_m",
"main_dwelling_roof_construction",
]
# row_a had no floor_2 data — those cells should be empty
assert [c.value for c in ws[2]] == [
"1 Fake Avenue, AB24 5CD",
"House",
43.61,
2.46,
None, # main_dwelling_floor_2_area_m2
None, # main_dwelling_floor_2_height_m
4,
]
# row_b should be fully populated
assert [c.value for c in ws[3]] == [
"2 Other Street, XY1 2AB",
"House",
50.0,
2.5,
48.0,
2.4,
4,
]

View file

@ -0,0 +1,329 @@
from backend.ecmk_fetcher.xml_processor import (
SapPropertyDetails,
flatten_sap_property,
parse_rdsap,
)
SAMPLE_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
<Report-Header>
<Property>
<Address>
<Address-Line-1>1</Address-Line-1>
<Address-Line-2>Fake Avenue</Address-Line-2>
<Post-Town>Random</Post-Town>
<Postcode>AB24 5CD</Postcode>
</Address>
</Property>
</Report-Header>
<SAP-Data>
<SAP-Property-Details>
<Property-Type>0</Property-Type>
<SAP-Building-Parts>
<SAP-Building-Part>
<Building-Part-Number>1</Building-Part-Number>
<Identifier>Main Dwelling</Identifier>
<Construction-Age-Band>C</Construction-Age-Band>
<Floor-Heat-Loss>7</Floor-Heat-Loss>
<Roof-Construction>4</Roof-Construction>
<Roof-Insulation-Location>2</Roof-Insulation-Location>
<Roof-Insulation-Thickness>100mm</Roof-Insulation-Thickness>
<Wall-Construction>4</Wall-Construction>
<Wall-Insulation-Type>4</Wall-Insulation-Type>
<SAP-Floor-Dimensions>
<SAP-Floor-Dimension>
<Heat-Loss-Perimeter quantity="metres">25.31</Heat-Loss-Perimeter>
<Room-Height quantity="metres">2.46</Room-Height>
<Total-Floor-Area quantity="square metres">43.61</Total-Floor-Area>
<Floor>0</Floor>
<Party-Wall-Length>0</Party-Wall-Length>
</SAP-Floor-Dimension>
<SAP-Floor-Dimension>
<Heat-Loss-Perimeter quantity="metres">26.16</Heat-Loss-Perimeter>
<Room-Height quantity="metres">2.44</Room-Height>
<Total-Floor-Area quantity="square metres">42.33</Total-Floor-Area>
<Floor>1</Floor>
<Party-Wall-Length>0</Party-Wall-Length>
</SAP-Floor-Dimension>
</SAP-Floor-Dimensions>
</SAP-Building-Part>
<SAP-Building-Part>
<Building-Part-Number>2</Building-Part-Number>
<Identifier>Extension</Identifier>
<Construction-Age-Band>C</Construction-Age-Band>
<Roof-Construction>8</Roof-Construction>
<Roof-Insulation-Location>7</Roof-Insulation-Location>
<Sloping-Ceiling-Insulation-Thickness>AB</Sloping-Ceiling-Insulation-Thickness>
<Wall-Construction>3</Wall-Construction>
<Wall-Insulation-Type>4</Wall-Insulation-Type>
<SAP-Floor-Dimensions>
<SAP-Floor-Dimension>
<Heat-Loss-Perimeter quantity="metres">6.85</Heat-Loss-Perimeter>
<Room-Height quantity="metres">2.24</Room-Height>
<Total-Floor-Area quantity="square metres">4.46</Total-Floor-Area>
<Floor>0</Floor>
<Party-Wall-Length>0</Party-Wall-Length>
</SAP-Floor-Dimension>
</SAP-Floor-Dimensions>
</SAP-Building-Part>
</SAP-Building-Parts>
</SAP-Property-Details>
</SAP-Data>
</RdSAP-Report>
"""
NO_ROOF_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
<Report-Header>
<Property>
<Address>
<Address-Line-1>5</Address-Line-1>
<Post-Town>Somewhere</Post-Town>
<Postcode>XY1 2AB</Postcode>
</Address>
</Property>
</Report-Header>
<SAP-Data>
<SAP-Property-Details>
<Property-Type>0</Property-Type>
<SAP-Building-Parts>
<SAP-Building-Part>
<Identifier>Main Dwelling</Identifier>
<SAP-Floor-Dimensions>
<SAP-Floor-Dimension>
<Heat-Loss-Perimeter quantity="metres">10.0</Heat-Loss-Perimeter>
<Room-Height quantity="metres">2.5</Room-Height>
<Total-Floor-Area quantity="square metres">50.0</Total-Floor-Area>
<Floor>0</Floor>
<Party-Wall-Length>3.0</Party-Wall-Length>
</SAP-Floor-Dimension>
</SAP-Floor-Dimensions>
</SAP-Building-Part>
</SAP-Building-Parts>
</SAP-Property-Details>
</SAP-Data>
</RdSAP-Report>
"""
def test_parse_rdsap_contract():
# arrange + act
result: SapPropertyDetails = parse_rdsap(SAMPLE_XML)
# assert
assert result == {
"reference": "1AB245CD",
"address": "1, Fake Avenue, Random, AB24 5CD",
"property_type": "House",
"building_parts": [
{
"identifier": "Main Dwelling",
"floors": [
{
"area_m2": 43.61,
"height_m": 2.46,
"heat_loss_perimeter_m": 25.31,
"party_wall_length_m": 0.0,
},
{
"area_m2": 42.33,
"height_m": 2.44,
"heat_loss_perimeter_m": 26.16,
"party_wall_length_m": 0.0,
},
],
"roof": {
"construction": 4,
"insulation_location": 2,
"insulation_thickness_mm": 100.0,
},
},
{
"identifier": "Extension",
"floors": [
{
"area_m2": 4.46,
"height_m": 2.24,
"heat_loss_perimeter_m": 6.85,
"party_wall_length_m": 0.0,
}
],
"roof": {
"construction": 8,
"insulation_location": 7,
},
},
],
}
ND_THICKNESS_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
<Report-Header>
<Property>
<Address>
<Address-Line-1>1</Address-Line-1>
<Post-Town>Somewhere</Post-Town>
<Postcode>AB1 2CD</Postcode>
</Address>
</Property>
</Report-Header>
<SAP-Data>
<SAP-Property-Details>
<Property-Type>0</Property-Type>
<SAP-Building-Parts>
<SAP-Building-Part>
<Identifier>Main Dwelling</Identifier>
<Roof-Construction>4</Roof-Construction>
<Roof-Insulation-Location>2</Roof-Insulation-Location>
<Roof-Insulation-Thickness>ND</Roof-Insulation-Thickness>
<SAP-Floor-Dimensions>
<SAP-Floor-Dimension>
<Heat-Loss-Perimeter quantity="metres">10.0</Heat-Loss-Perimeter>
<Room-Height quantity="metres">2.5</Room-Height>
<Total-Floor-Area quantity="square metres">50.0</Total-Floor-Area>
<Floor>0</Floor>
<Party-Wall-Length>0</Party-Wall-Length>
</SAP-Floor-Dimension>
</SAP-Floor-Dimensions>
</SAP-Building-Part>
</SAP-Building-Parts>
</SAP-Property-Details>
</SAP-Data>
</RdSAP-Report>
"""
ND_INSULATION_LOCATION_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
<Report-Header>
<Property>
<Address>
<Address-Line-1>1</Address-Line-1>
<Post-Town>Somewhere</Post-Town>
<Postcode>AB1 2CD</Postcode>
</Address>
</Property>
</Report-Header>
<SAP-Data>
<SAP-Property-Details>
<Property-Type>0</Property-Type>
<SAP-Building-Parts>
<SAP-Building-Part>
<Identifier>Main Dwelling</Identifier>
<Roof-Construction>4</Roof-Construction>
<Roof-Insulation-Location>ND</Roof-Insulation-Location>
<Roof-Insulation-Thickness>250</Roof-Insulation-Thickness>
<SAP-Floor-Dimensions>
<SAP-Floor-Dimension>
<Heat-Loss-Perimeter quantity="metres">10.0</Heat-Loss-Perimeter>
<Room-Height quantity="metres">2.5</Room-Height>
<Total-Floor-Area quantity="square metres">50.0</Total-Floor-Area>
<Floor>0</Floor>
<Party-Wall-Length>0</Party-Wall-Length>
</SAP-Floor-Dimension>
</SAP-Floor-Dimensions>
</SAP-Building-Part>
</SAP-Building-Parts>
</SAP-Property-Details>
</SAP-Data>
</RdSAP-Report>
"""
def test_parse_rdsap_nd_thickness():
# 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Thickness
# — it should be retained as-is rather than raising
# arrange + act
result: SapPropertyDetails = parse_rdsap(ND_THICKNESS_XML)
# assert
assert result["building_parts"][0]["roof"] == {
"construction": 4,
"insulation_location": 2,
"insulation_thickness_mm": "ND",
}
def test_parse_rdsap_nd_location():
# 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Location
# — it should be retained as-is rather than raising
# arrange + act
result: SapPropertyDetails = parse_rdsap(ND_INSULATION_LOCATION_XML)
# assert
assert result["building_parts"][0]["roof"] == {
"construction": 4,
"insulation_location": "ND",
"insulation_thickness_mm": 250,
}
def test_flatten_full():
# Two building parts; Main Dwelling has two floors + full roof,
# Extension has one floor + partial roof (no thickness)
# arrange
details: SapPropertyDetails = parse_rdsap(SAMPLE_XML)
# act
result = flatten_sap_property(details)
# assert
assert result == {
"reference": "1AB245CD",
"address": "1, Fake Avenue, Random, AB24 5CD",
"property_type": "House",
"main_dwelling_floor_1_area_m2": 43.61,
"main_dwelling_floor_1_height_m": 2.46,
"main_dwelling_floor_1_heat_loss_perimeter_m": 25.31,
"main_dwelling_floor_1_party_wall_length_m": 0.0,
"main_dwelling_floor_2_area_m2": 42.33,
"main_dwelling_floor_2_height_m": 2.44,
"main_dwelling_floor_2_heat_loss_perimeter_m": 26.16,
"main_dwelling_floor_2_party_wall_length_m": 0.0,
"main_dwelling_roof_construction": 4,
"main_dwelling_roof_insulation_location": 2,
"main_dwelling_roof_insulation_thickness_mm": 100.0,
"extension_floor_1_area_m2": 4.46,
"extension_floor_1_height_m": 2.24,
"extension_floor_1_heat_loss_perimeter_m": 6.85,
"extension_floor_1_party_wall_length_m": 0.0,
"extension_roof_construction": 8,
"extension_roof_insulation_location": 7,
}
def test_flatten_no_roof():
# Single building part with no roof — roof keys must be absent entirely
# arrange
details: SapPropertyDetails = parse_rdsap(NO_ROOF_XML)
# act
result = flatten_sap_property(details)
# assert
assert result == {
"reference": "5XY12AB",
"address": "5, Somewhere, XY1 2AB",
"property_type": "House",
"main_dwelling_floor_1_area_m2": 50.0,
"main_dwelling_floor_1_height_m": 2.5,
"main_dwelling_floor_1_heat_loss_perimeter_m": 10.0,
"main_dwelling_floor_1_party_wall_length_m": 3.0,
}

View file

@ -28,6 +28,18 @@ def upload_file_to_sharepoint(
)
def upload_excel_to_sharepoint(
client: DomnaSharepointClient,
file_path: str,
sharepoint_path: str,
) -> None:
client.upload_file(
file_path=file_path,
sharepoint_path=sharepoint_path,
file_name=os.path.basename(file_path),
)
def upload_file_to_s3_and_update_db(
bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum
) -> None:

View file

@ -0,0 +1,226 @@
import xml.etree.ElementTree as ET
from typing import Any, List, Optional, TypedDict
from backend.ecmk_fetcher.reports import build_property_id
from datatypes.epc.domain.field_mappings import PROPERTY_TYPE_LOOKUP
# This file should ultimately live somewhere different, probably
class Floor(TypedDict):
area_m2: float
height_m: float
heat_loss_perimeter_m: float
party_wall_length_m: float
class Roof(TypedDict, total=False):
construction: int # TODO: map to str
insulation_location: int | str # TODO: map to str
insulation_thickness_mm: float | str
class BuildingPart(TypedDict):
identifier: str # e.g. "Main Dwelling", "Extension"
floors: List[Floor]
roof: Optional[Roof]
class SapPropertyDetails(TypedDict):
reference: str
address: str
property_type: str
building_parts: List[BuildingPart]
def _get_namespace(tag: str) -> str:
return tag.split("}")[0].strip("{")
def _require_text(value: Optional[str], field: str) -> str:
if value is None:
raise ValueError(f"Missing required field: {field}")
return value
def _parse_float(value: Optional[str], field: str) -> float:
if value is None:
raise ValueError(f"Missing float field: {field}")
return float(value)
def _parse_int(value: Optional[str], field: str) -> int:
if value is None:
raise ValueError(f"Missing int field: {field}")
return int(value)
def _parse_thickness_mm(value: Optional[str]) -> Optional[float | str]:
if value is None:
return None
stripped = value.replace("mm", "").strip()
try:
return float(stripped)
except ValueError:
return stripped
def parse_rdsap(xml_string: str) -> SapPropertyDetails:
root = ET.fromstring(xml_string)
ns_uri: str = _get_namespace(root.tag)
ns: dict[str, str] = {"r": ns_uri}
# --- Address ---
addr_elem = root.find(".//r:Address", ns)
if addr_elem is None:
raise ValueError("Address element not found")
address_line_1: str = addr_elem.findtext(
"r:Address-Line-1", default="", namespaces=ns
)
postcode: str = addr_elem.findtext("r:Postcode", default="", namespaces=ns)
address_parts: List[str] = [
address_line_1,
addr_elem.findtext("r:Address-Line-2", default="", namespaces=ns),
addr_elem.findtext("r:Post-Town", default="", namespaces=ns),
postcode,
]
address: str = ", ".join(part for part in address_parts if part)
reference: str = build_property_id(address_line_1, postcode)
# --- Property Type ---
prop_type_text = root.findtext(".//r:Property-Type", namespaces=ns)
prop_type_code: int = _parse_int(prop_type_text, "Property-Type")
property_type: str = PROPERTY_TYPE_LOOKUP[prop_type_code]
# --- Building Parts ---
building_parts: List[BuildingPart] = []
for bp in root.findall(".//r:SAP-Building-Part", ns):
identifier_text = bp.findtext("r:Identifier", namespaces=ns)
identifier: str = _require_text(identifier_text, "Identifier")
# Floors
floors: List[Floor] = []
for f in bp.findall(".//r:SAP-Floor-Dimension", ns):
area = _parse_float(
f.findtext("r:Total-Floor-Area", namespaces=ns),
"Total-Floor-Area",
)
height = _parse_float(
f.findtext("r:Room-Height", namespaces=ns),
"Room-Height",
)
heat_loss = _parse_float(
f.findtext("r:Heat-Loss-Perimeter", namespaces=ns),
"Heat-Loss-Perimeter",
)
party_wall = _parse_float(
f.findtext("r:Party-Wall-Length", namespaces=ns),
"Party-Wall-Length",
)
floor: Floor = {
"area_m2": area,
"height_m": height,
"heat_loss_perimeter_m": heat_loss,
"party_wall_length_m": party_wall,
}
floors.append(floor)
# Roof (optional)
roof: Optional[Roof] = None
roof_construction_text = bp.findtext("r:Roof-Construction", namespaces=ns)
roof_ins_loc_text = bp.findtext("r:Roof-Insulation-Location", namespaces=ns)
roof_thickness_text = bp.findtext("r:Roof-Insulation-Thickness", namespaces=ns)
if (
roof_construction_text is not None
or roof_ins_loc_text is not None
or roof_thickness_text is not None
):
roof_dict: Roof = {}
if roof_construction_text is not None:
roof_dict["construction"] = _parse_int(
roof_construction_text, "Roof-Construction"
)
if roof_ins_loc_text is not None:
try:
roof_dict["insulation_location"] = _parse_int(
roof_ins_loc_text, "Roof-Insulation-Location"
)
except ValueError:
roof_dict["insulation_location"] = roof_ins_loc_text
thickness = _parse_thickness_mm(roof_thickness_text)
if thickness is not None:
roof_dict["insulation_thickness_mm"] = thickness
roof = roof_dict
building_part: BuildingPart = {
"identifier": identifier,
"floors": floors,
"roof": roof,
}
building_parts.append(building_part)
result: SapPropertyDetails = {
"reference": reference,
"address": address,
"property_type": property_type,
"building_parts": building_parts,
}
return result
def _normalise_identifier(identifier: str) -> str:
return identifier.lower().replace(" ", "_").replace("-", "_")
def flatten_sap_property(details: SapPropertyDetails) -> dict[str, Any]:
row: dict[str, Any] = {}
row["reference"] = details["reference"]
row["address"] = details["address"]
row["property_type"] = details["property_type"]
for bp in details["building_parts"]:
prefix = _normalise_identifier(bp["identifier"])
for i, floor in enumerate(bp["floors"], start=1):
floor_prefix = f"{prefix}_floor_{i}"
row[f"{floor_prefix}_area_m2"] = floor["area_m2"]
row[f"{floor_prefix}_height_m"] = floor["height_m"]
row[f"{floor_prefix}_heat_loss_perimeter_m"] = floor[
"heat_loss_perimeter_m"
]
row[f"{floor_prefix}_party_wall_length_m"] = floor["party_wall_length_m"]
roof = bp.get("roof")
if roof:
if "construction" in roof:
row[f"{prefix}_roof_construction"] = roof["construction"]
if "insulation_location" in roof:
row[f"{prefix}_roof_insulation_location"] = roof["insulation_location"]
if "insulation_thickness_mm" in roof:
row[f"{prefix}_roof_insulation_thickness_mm"] = roof[
"insulation_thickness_mm"
]
return row

View file

@ -0,0 +1,3 @@
PROPERTY_TYPE_LOOKUP = {0: "House", 1: "Bungalow", 2: "Flat", 3: "Maisonette"}
ROOF_CONSTRUCTION_LOOKUP = {}
ROOF_INSULATION_LOCATION_LOOKUP = {}

View file

@ -3,6 +3,6 @@ pythonpath = .
log_cli = true
log_cli_level = INFO
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/
markers =
integration: mark a test as an integration test

View file

@ -90,6 +90,41 @@ class DomnaSharepointClient:
file_name, get_file_stream(file_path), sharepoint_path
)
def download_file(self, sharepoint_path: str, local_path: str) -> bool:
"""
Download a file from SharePoint to a local path.
Returns True if the file was downloaded, False if it does not exist yet.
Raises on any other error.
"""
sharepoint_client = SharePointClient(
tenant_id=self.sharepoint_tenant_id,
client_id=self.sharepoint_client_id,
client_secret=self.sharepoint_client_secret,
site_id=self.sharepoint_drive.value,
)
try:
metadata: Dict[str, Any] = sharepoint_client.get_file_metadata(sharepoint_path)
except ValueError:
return False
download_url: Optional[str] = metadata.get("@microsoft.graph.downloadUrl")
if not download_url:
return False
content: BytesIO = SharePointClient.download_sharepoint_file(download_url)
parent_dir = os.path.dirname(local_path)
if parent_dir:
os.makedirs(parent_dir, exist_ok=True)
with open(local_path, "wb") as f:
f.write(content.getvalue())
self.logger.debug(f"Downloaded SharePoint file to: {local_path}")
return True
def create_temp_file(self, content: BytesIO, path: str):
# Ensure the path is under /tmp/
new_path = os.path.join("/tmp/sharepoint", path)

View file

@ -278,6 +278,17 @@ class SharePointClient:
# logger.debug(f"Listing folder contents from URL: {url}")
return "GET", url, None
@api_call_decorator
def get_file_metadata(self, file_path: str) -> Dict[str, Any]:
"""
GET /drives/{drive-id}/root:/{file_path}
Returns file metadata, including '@microsoft.graph.downloadUrl'.
Raises ValueError if the file does not exist (404).
"""
url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{file_path}"
return "GET", url, None
@api_call_decorator
def create_folder(self, file_name: str, folder_path: str) -> Dict[str, Any]:
"""
@ -325,7 +336,7 @@ class SharePointClient:
return self.upload_file(file_name, sharepoint_parent_id, file_stream)
@staticmethod
def download_sharepoint_file(download_url):
def download_sharepoint_file(download_url: str) -> BytesIO:
"""
Downloads a file from the given URL and returns its content.