mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge branch 'main' into caveman-claude
This commit is contained in:
commit
638471a5d6
12 changed files with 835 additions and 11 deletions
|
|
@ -16,6 +16,7 @@ class FileTypeEnum(enum.Enum):
|
|||
PAS_2023_OCCUPANCY = "pas_2023_occupancy"
|
||||
ECMK_SITE_NOTE = "ecmk_site_note"
|
||||
ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
|
||||
ECMK_SURVEY_XML = "ecmk_survey_xml"
|
||||
|
||||
|
||||
class FileSourceEnum(enum.Enum):
|
||||
|
|
|
|||
53
backend/ecmk_fetcher/excel_writer.py
Normal file
53
backend/ecmk_fetcher/excel_writer.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import os
|
||||
from typing import Any
|
||||
|
||||
from openpyxl import Workbook, load_workbook
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
|
||||
|
||||
def write_row(file_path: str, row_data: dict[str, Any]) -> None:
|
||||
new_keys = list(row_data.keys())
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
wb = Workbook()
|
||||
ws: Worksheet = wb.active # type: ignore[assignment]
|
||||
ws.append(new_keys)
|
||||
ws.append(list(row_data.values()))
|
||||
wb.save(file_path)
|
||||
return
|
||||
|
||||
wb = load_workbook(file_path)
|
||||
ws = wb.active # type: ignore[assignment]
|
||||
|
||||
# Build a mutable header list and insert new columns using insert_cols so
|
||||
# that existing row data shifts along with the headers.
|
||||
# Filter out None to guard against blank columns in the source file.
|
||||
headers: list[str] = [cell.value for cell in ws[1] if cell.value is not None] # type: ignore[misc]
|
||||
|
||||
for key in new_keys:
|
||||
if key in headers:
|
||||
continue
|
||||
|
||||
# Find the first key that comes after this one in new_keys that already
|
||||
# exists in headers — insert before it to keep columns logically grouped.
|
||||
insert_before: str | None = None
|
||||
found = False
|
||||
for k in new_keys:
|
||||
if k == key:
|
||||
found = True
|
||||
continue
|
||||
if found and k in headers:
|
||||
insert_before = k
|
||||
break
|
||||
|
||||
if insert_before is not None:
|
||||
col_idx = headers.index(insert_before) + 1 # 1-based
|
||||
ws.insert_cols(col_idx)
|
||||
ws.cell(row=1, column=col_idx, value=key)
|
||||
headers.insert(col_idx - 1, key)
|
||||
else:
|
||||
headers.append(key)
|
||||
ws.cell(row=1, column=len(headers), value=key)
|
||||
|
||||
ws.append([row_data.get(col) for col in headers])
|
||||
wb.save(file_path)
|
||||
|
|
@ -26,13 +26,17 @@ from backend.ecmk_fetcher.browser import (
|
|||
)
|
||||
from backend.ecmk_fetcher.reports import (
|
||||
REPORT_TYPES,
|
||||
FileDownloadButtonType,
|
||||
build_property_id,
|
||||
map_report_type_to_db_file_type,
|
||||
)
|
||||
from backend.ecmk_fetcher.excel_writer import write_row
|
||||
from backend.ecmk_fetcher.upload import (
|
||||
upload_excel_to_sharepoint,
|
||||
upload_file_to_s3_and_update_db,
|
||||
upload_file_to_sharepoint,
|
||||
)
|
||||
from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
|
||||
from utils.logger import setup_logger
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
from utils.sharepoint.domna_sites import DomnaSites
|
||||
|
|
@ -59,6 +63,15 @@ def run_job() -> None:
|
|||
)
|
||||
|
||||
sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
|
||||
sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
|
||||
|
||||
DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
|
||||
local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME)
|
||||
|
||||
sharepoint_client.download_file(
|
||||
sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
|
||||
local_path=local_dimensions_path,
|
||||
)
|
||||
|
||||
s3_bucket: str = "retrofit-energy-assessments-dev"
|
||||
|
||||
|
|
@ -141,15 +154,30 @@ def run_job() -> None:
|
|||
)
|
||||
|
||||
try:
|
||||
upload_file_to_sharepoint(
|
||||
client=sharepoint_client,
|
||||
file_path=file_path,
|
||||
base_path=sharepoint_base_path,
|
||||
subpath=sharepoint_address,
|
||||
)
|
||||
logger.info(
|
||||
f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
|
||||
)
|
||||
if report_type == FileDownloadButtonType.RAW_XML.value:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
xml_string = f.read()
|
||||
details = parse_rdsap(xml_string)
|
||||
row_data = flatten_sap_property(details)
|
||||
write_row(local_dimensions_path, row_data)
|
||||
upload_excel_to_sharepoint(
|
||||
client=sharepoint_client,
|
||||
file_path=local_dimensions_path,
|
||||
sharepoint_path=sharepoint_excel_path,
|
||||
)
|
||||
logger.info(
|
||||
f"Written dimensions row and uploaded Dimensions.xlsx for {address}"
|
||||
)
|
||||
else:
|
||||
upload_file_to_sharepoint(
|
||||
client=sharepoint_client,
|
||||
file_path=file_path,
|
||||
base_path=sharepoint_base_path,
|
||||
subpath=sharepoint_address,
|
||||
)
|
||||
logger.info(
|
||||
f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
|
||||
)
|
||||
|
||||
# Upload to s3 and update db
|
||||
upload_file_to_s3_and_update_db(
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ class FileDownloadButtonType(Enum):
|
|||
REPORT_TYPES = [
|
||||
FileDownloadButtonType.ASSESSOR_HUB_SITENOTE_REPORT.value,
|
||||
FileDownloadButtonType.SITENOTE_REPORT.value,
|
||||
FileDownloadButtonType.RAW_XML.value,
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -23,6 +24,8 @@ def map_report_type_to_db_file_type(report_type: int) -> FileTypeEnum:
|
|||
return FileTypeEnum.ECMK_SITE_NOTE
|
||||
case FileDownloadButtonType.SITENOTE_REPORT.value:
|
||||
return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE
|
||||
case FileDownloadButtonType.RAW_XML.value:
|
||||
return FileTypeEnum.ECMK_SURVEY_XML
|
||||
case _:
|
||||
raise ValueError("Unknown report type")
|
||||
|
||||
|
|
|
|||
123
backend/ecmk_fetcher/tests/test_excel_writer.py
Normal file
123
backend/ecmk_fetcher/tests/test_excel_writer.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
import os
|
||||
import pathlib
|
||||
import pytest
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
|
||||
from backend.ecmk_fetcher.excel_writer import write_row
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def xlsx_path(tmp_path: pathlib.Path) -> str:
|
||||
return str(tmp_path / "output.xlsx")
|
||||
|
||||
|
||||
def _active_sheet(file_path: str) -> Worksheet:
|
||||
ws = load_workbook(file_path).active
|
||||
assert isinstance(ws, Worksheet)
|
||||
return ws
|
||||
|
||||
|
||||
def test_write_row_creates_file(xlsx_path: str):
|
||||
# arrange
|
||||
row = {
|
||||
"address": "1 Fake Avenue, AB24 5CD",
|
||||
"property_type": "House",
|
||||
"main_dwelling_floor_1_area_m2": 43.61,
|
||||
}
|
||||
|
||||
# act
|
||||
write_row(xlsx_path, row)
|
||||
|
||||
# assert
|
||||
assert os.path.exists(xlsx_path)
|
||||
ws = _active_sheet(xlsx_path)
|
||||
assert [c.value for c in ws[1]] == list(row.keys())
|
||||
assert [c.value for c in ws[2]] == list(row.values())
|
||||
|
||||
|
||||
def test_write_row_appends_to_existing(xlsx_path: str):
|
||||
# arrange
|
||||
row_a = {
|
||||
"address": "1 Fake Avenue, AB24 5CD",
|
||||
"property_type": "House",
|
||||
"main_dwelling_floor_1_area_m2": 43.61,
|
||||
}
|
||||
row_b = {
|
||||
"address": "2 Other Street, XY1 2AB",
|
||||
"property_type": "Flat",
|
||||
"main_dwelling_floor_1_area_m2": 30.0,
|
||||
}
|
||||
|
||||
# act
|
||||
write_row(xlsx_path, row_a)
|
||||
write_row(xlsx_path, row_b)
|
||||
|
||||
# assert
|
||||
ws = _active_sheet(xlsx_path)
|
||||
assert ws.max_row == 3 # 1 header + 2 data rows
|
||||
assert [c.value for c in ws[1]] == list(row_a.keys())
|
||||
assert [c.value for c in ws[2]] == list(row_a.values())
|
||||
assert [c.value for c in ws[3]] == list(row_b.values())
|
||||
|
||||
|
||||
def test_write_row_inserts_new_columns_at_logical_positions(xlsx_path: str):
|
||||
# arrange
|
||||
# First row: main_dwelling floor 1 + roof
|
||||
# Second row: also has main_dwelling floor 2 — should be inserted between floor 1 and roof,
|
||||
# not appended to the end
|
||||
row_a = {
|
||||
"address": "1 Fake Avenue, AB24 5CD",
|
||||
"property_type": "House",
|
||||
"main_dwelling_floor_1_area_m2": 43.61,
|
||||
"main_dwelling_floor_1_height_m": 2.46,
|
||||
"main_dwelling_roof_construction": 4,
|
||||
}
|
||||
row_b = {
|
||||
"address": "2 Other Street, XY1 2AB",
|
||||
"property_type": "House",
|
||||
"main_dwelling_floor_1_area_m2": 50.0,
|
||||
"main_dwelling_floor_1_height_m": 2.5,
|
||||
"main_dwelling_floor_2_area_m2": 48.0,
|
||||
"main_dwelling_floor_2_height_m": 2.4,
|
||||
"main_dwelling_roof_construction": 4,
|
||||
}
|
||||
|
||||
# act
|
||||
write_row(xlsx_path, row_a)
|
||||
write_row(xlsx_path, row_b)
|
||||
|
||||
# assert
|
||||
ws = _active_sheet(xlsx_path)
|
||||
|
||||
assert [c.value for c in ws[1]] == [
|
||||
"address",
|
||||
"property_type",
|
||||
"main_dwelling_floor_1_area_m2",
|
||||
"main_dwelling_floor_1_height_m",
|
||||
"main_dwelling_floor_2_area_m2", # inserted before roof, not at end
|
||||
"main_dwelling_floor_2_height_m",
|
||||
"main_dwelling_roof_construction",
|
||||
]
|
||||
|
||||
# row_a had no floor_2 data — those cells should be empty
|
||||
assert [c.value for c in ws[2]] == [
|
||||
"1 Fake Avenue, AB24 5CD",
|
||||
"House",
|
||||
43.61,
|
||||
2.46,
|
||||
None, # main_dwelling_floor_2_area_m2
|
||||
None, # main_dwelling_floor_2_height_m
|
||||
4,
|
||||
]
|
||||
|
||||
# row_b should be fully populated
|
||||
assert [c.value for c in ws[3]] == [
|
||||
"2 Other Street, XY1 2AB",
|
||||
"House",
|
||||
50.0,
|
||||
2.5,
|
||||
48.0,
|
||||
2.4,
|
||||
4,
|
||||
]
|
||||
329
backend/ecmk_fetcher/tests/test_xml_processor.py
Normal file
329
backend/ecmk_fetcher/tests/test_xml_processor.py
Normal file
|
|
@ -0,0 +1,329 @@
|
|||
from backend.ecmk_fetcher.xml_processor import (
|
||||
SapPropertyDetails,
|
||||
flatten_sap_property,
|
||||
parse_rdsap,
|
||||
)
|
||||
|
||||
|
||||
SAMPLE_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
|
||||
<Report-Header>
|
||||
<Property>
|
||||
<Address>
|
||||
<Address-Line-1>1</Address-Line-1>
|
||||
<Address-Line-2>Fake Avenue</Address-Line-2>
|
||||
<Post-Town>Random</Post-Town>
|
||||
<Postcode>AB24 5CD</Postcode>
|
||||
</Address>
|
||||
</Property>
|
||||
</Report-Header>
|
||||
|
||||
<SAP-Data>
|
||||
<SAP-Property-Details>
|
||||
<Property-Type>0</Property-Type>
|
||||
|
||||
<SAP-Building-Parts>
|
||||
|
||||
<SAP-Building-Part>
|
||||
<Building-Part-Number>1</Building-Part-Number>
|
||||
<Identifier>Main Dwelling</Identifier>
|
||||
<Construction-Age-Band>C</Construction-Age-Band>
|
||||
<Floor-Heat-Loss>7</Floor-Heat-Loss>
|
||||
|
||||
<Roof-Construction>4</Roof-Construction>
|
||||
<Roof-Insulation-Location>2</Roof-Insulation-Location>
|
||||
<Roof-Insulation-Thickness>100mm</Roof-Insulation-Thickness>
|
||||
|
||||
<Wall-Construction>4</Wall-Construction>
|
||||
<Wall-Insulation-Type>4</Wall-Insulation-Type>
|
||||
|
||||
<SAP-Floor-Dimensions>
|
||||
<SAP-Floor-Dimension>
|
||||
<Heat-Loss-Perimeter quantity="metres">25.31</Heat-Loss-Perimeter>
|
||||
<Room-Height quantity="metres">2.46</Room-Height>
|
||||
<Total-Floor-Area quantity="square metres">43.61</Total-Floor-Area>
|
||||
<Floor>0</Floor>
|
||||
<Party-Wall-Length>0</Party-Wall-Length>
|
||||
</SAP-Floor-Dimension>
|
||||
|
||||
<SAP-Floor-Dimension>
|
||||
<Heat-Loss-Perimeter quantity="metres">26.16</Heat-Loss-Perimeter>
|
||||
<Room-Height quantity="metres">2.44</Room-Height>
|
||||
<Total-Floor-Area quantity="square metres">42.33</Total-Floor-Area>
|
||||
<Floor>1</Floor>
|
||||
<Party-Wall-Length>0</Party-Wall-Length>
|
||||
</SAP-Floor-Dimension>
|
||||
</SAP-Floor-Dimensions>
|
||||
|
||||
</SAP-Building-Part>
|
||||
|
||||
<SAP-Building-Part>
|
||||
<Building-Part-Number>2</Building-Part-Number>
|
||||
<Identifier>Extension</Identifier>
|
||||
<Construction-Age-Band>C</Construction-Age-Band>
|
||||
|
||||
<Roof-Construction>8</Roof-Construction>
|
||||
<Roof-Insulation-Location>7</Roof-Insulation-Location>
|
||||
<Sloping-Ceiling-Insulation-Thickness>AB</Sloping-Ceiling-Insulation-Thickness>
|
||||
|
||||
<Wall-Construction>3</Wall-Construction>
|
||||
<Wall-Insulation-Type>4</Wall-Insulation-Type>
|
||||
|
||||
<SAP-Floor-Dimensions>
|
||||
<SAP-Floor-Dimension>
|
||||
<Heat-Loss-Perimeter quantity="metres">6.85</Heat-Loss-Perimeter>
|
||||
<Room-Height quantity="metres">2.24</Room-Height>
|
||||
<Total-Floor-Area quantity="square metres">4.46</Total-Floor-Area>
|
||||
<Floor>0</Floor>
|
||||
<Party-Wall-Length>0</Party-Wall-Length>
|
||||
</SAP-Floor-Dimension>
|
||||
</SAP-Floor-Dimensions>
|
||||
|
||||
</SAP-Building-Part>
|
||||
|
||||
</SAP-Building-Parts>
|
||||
|
||||
</SAP-Property-Details>
|
||||
</SAP-Data>
|
||||
</RdSAP-Report>
|
||||
"""
|
||||
|
||||
|
||||
NO_ROOF_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
|
||||
<Report-Header>
|
||||
<Property>
|
||||
<Address>
|
||||
<Address-Line-1>5</Address-Line-1>
|
||||
<Post-Town>Somewhere</Post-Town>
|
||||
<Postcode>XY1 2AB</Postcode>
|
||||
</Address>
|
||||
</Property>
|
||||
</Report-Header>
|
||||
<SAP-Data>
|
||||
<SAP-Property-Details>
|
||||
<Property-Type>0</Property-Type>
|
||||
<SAP-Building-Parts>
|
||||
<SAP-Building-Part>
|
||||
<Identifier>Main Dwelling</Identifier>
|
||||
<SAP-Floor-Dimensions>
|
||||
<SAP-Floor-Dimension>
|
||||
<Heat-Loss-Perimeter quantity="metres">10.0</Heat-Loss-Perimeter>
|
||||
<Room-Height quantity="metres">2.5</Room-Height>
|
||||
<Total-Floor-Area quantity="square metres">50.0</Total-Floor-Area>
|
||||
<Floor>0</Floor>
|
||||
<Party-Wall-Length>3.0</Party-Wall-Length>
|
||||
</SAP-Floor-Dimension>
|
||||
</SAP-Floor-Dimensions>
|
||||
</SAP-Building-Part>
|
||||
</SAP-Building-Parts>
|
||||
</SAP-Property-Details>
|
||||
</SAP-Data>
|
||||
</RdSAP-Report>
|
||||
"""
|
||||
|
||||
|
||||
def test_parse_rdsap_contract():
|
||||
# arrange + act
|
||||
result: SapPropertyDetails = parse_rdsap(SAMPLE_XML)
|
||||
|
||||
# assert
|
||||
assert result == {
|
||||
"reference": "1AB245CD",
|
||||
"address": "1, Fake Avenue, Random, AB24 5CD",
|
||||
"property_type": "House",
|
||||
"building_parts": [
|
||||
{
|
||||
"identifier": "Main Dwelling",
|
||||
"floors": [
|
||||
{
|
||||
"area_m2": 43.61,
|
||||
"height_m": 2.46,
|
||||
"heat_loss_perimeter_m": 25.31,
|
||||
"party_wall_length_m": 0.0,
|
||||
},
|
||||
{
|
||||
"area_m2": 42.33,
|
||||
"height_m": 2.44,
|
||||
"heat_loss_perimeter_m": 26.16,
|
||||
"party_wall_length_m": 0.0,
|
||||
},
|
||||
],
|
||||
"roof": {
|
||||
"construction": 4,
|
||||
"insulation_location": 2,
|
||||
"insulation_thickness_mm": 100.0,
|
||||
},
|
||||
},
|
||||
{
|
||||
"identifier": "Extension",
|
||||
"floors": [
|
||||
{
|
||||
"area_m2": 4.46,
|
||||
"height_m": 2.24,
|
||||
"heat_loss_perimeter_m": 6.85,
|
||||
"party_wall_length_m": 0.0,
|
||||
}
|
||||
],
|
||||
"roof": {
|
||||
"construction": 8,
|
||||
"insulation_location": 7,
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
ND_THICKNESS_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
|
||||
<Report-Header>
|
||||
<Property>
|
||||
<Address>
|
||||
<Address-Line-1>1</Address-Line-1>
|
||||
<Post-Town>Somewhere</Post-Town>
|
||||
<Postcode>AB1 2CD</Postcode>
|
||||
</Address>
|
||||
</Property>
|
||||
</Report-Header>
|
||||
<SAP-Data>
|
||||
<SAP-Property-Details>
|
||||
<Property-Type>0</Property-Type>
|
||||
<SAP-Building-Parts>
|
||||
<SAP-Building-Part>
|
||||
<Identifier>Main Dwelling</Identifier>
|
||||
<Roof-Construction>4</Roof-Construction>
|
||||
<Roof-Insulation-Location>2</Roof-Insulation-Location>
|
||||
<Roof-Insulation-Thickness>ND</Roof-Insulation-Thickness>
|
||||
<SAP-Floor-Dimensions>
|
||||
<SAP-Floor-Dimension>
|
||||
<Heat-Loss-Perimeter quantity="metres">10.0</Heat-Loss-Perimeter>
|
||||
<Room-Height quantity="metres">2.5</Room-Height>
|
||||
<Total-Floor-Area quantity="square metres">50.0</Total-Floor-Area>
|
||||
<Floor>0</Floor>
|
||||
<Party-Wall-Length>0</Party-Wall-Length>
|
||||
</SAP-Floor-Dimension>
|
||||
</SAP-Floor-Dimensions>
|
||||
</SAP-Building-Part>
|
||||
</SAP-Building-Parts>
|
||||
</SAP-Property-Details>
|
||||
</SAP-Data>
|
||||
</RdSAP-Report>
|
||||
"""
|
||||
|
||||
ND_INSULATION_LOCATION_XML = """<RdSAP-Report xmlns="https://epbr.digital.communities.gov.uk/xsd/rdsap">
|
||||
<Report-Header>
|
||||
<Property>
|
||||
<Address>
|
||||
<Address-Line-1>1</Address-Line-1>
|
||||
<Post-Town>Somewhere</Post-Town>
|
||||
<Postcode>AB1 2CD</Postcode>
|
||||
</Address>
|
||||
</Property>
|
||||
</Report-Header>
|
||||
<SAP-Data>
|
||||
<SAP-Property-Details>
|
||||
<Property-Type>0</Property-Type>
|
||||
<SAP-Building-Parts>
|
||||
<SAP-Building-Part>
|
||||
<Identifier>Main Dwelling</Identifier>
|
||||
<Roof-Construction>4</Roof-Construction>
|
||||
<Roof-Insulation-Location>ND</Roof-Insulation-Location>
|
||||
<Roof-Insulation-Thickness>250</Roof-Insulation-Thickness>
|
||||
<SAP-Floor-Dimensions>
|
||||
<SAP-Floor-Dimension>
|
||||
<Heat-Loss-Perimeter quantity="metres">10.0</Heat-Loss-Perimeter>
|
||||
<Room-Height quantity="metres">2.5</Room-Height>
|
||||
<Total-Floor-Area quantity="square metres">50.0</Total-Floor-Area>
|
||||
<Floor>0</Floor>
|
||||
<Party-Wall-Length>0</Party-Wall-Length>
|
||||
</SAP-Floor-Dimension>
|
||||
</SAP-Floor-Dimensions>
|
||||
</SAP-Building-Part>
|
||||
</SAP-Building-Parts>
|
||||
</SAP-Property-Details>
|
||||
</SAP-Data>
|
||||
</RdSAP-Report>
|
||||
"""
|
||||
|
||||
|
||||
def test_parse_rdsap_nd_thickness():
|
||||
# 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Thickness
|
||||
# — it should be retained as-is rather than raising
|
||||
|
||||
# arrange + act
|
||||
result: SapPropertyDetails = parse_rdsap(ND_THICKNESS_XML)
|
||||
|
||||
# assert
|
||||
assert result["building_parts"][0]["roof"] == {
|
||||
"construction": 4,
|
||||
"insulation_location": 2,
|
||||
"insulation_thickness_mm": "ND",
|
||||
}
|
||||
|
||||
|
||||
def test_parse_rdsap_nd_location():
|
||||
# 'ND' (not determined) is a valid value in the wild for Roof-Insulation-Location
|
||||
# — it should be retained as-is rather than raising
|
||||
|
||||
# arrange + act
|
||||
result: SapPropertyDetails = parse_rdsap(ND_INSULATION_LOCATION_XML)
|
||||
|
||||
# assert
|
||||
assert result["building_parts"][0]["roof"] == {
|
||||
"construction": 4,
|
||||
"insulation_location": "ND",
|
||||
"insulation_thickness_mm": 250,
|
||||
}
|
||||
|
||||
|
||||
def test_flatten_full():
|
||||
# Two building parts; Main Dwelling has two floors + full roof,
|
||||
# Extension has one floor + partial roof (no thickness)
|
||||
|
||||
# arrange
|
||||
details: SapPropertyDetails = parse_rdsap(SAMPLE_XML)
|
||||
|
||||
# act
|
||||
result = flatten_sap_property(details)
|
||||
|
||||
# assert
|
||||
assert result == {
|
||||
"reference": "1AB245CD",
|
||||
"address": "1, Fake Avenue, Random, AB24 5CD",
|
||||
"property_type": "House",
|
||||
"main_dwelling_floor_1_area_m2": 43.61,
|
||||
"main_dwelling_floor_1_height_m": 2.46,
|
||||
"main_dwelling_floor_1_heat_loss_perimeter_m": 25.31,
|
||||
"main_dwelling_floor_1_party_wall_length_m": 0.0,
|
||||
"main_dwelling_floor_2_area_m2": 42.33,
|
||||
"main_dwelling_floor_2_height_m": 2.44,
|
||||
"main_dwelling_floor_2_heat_loss_perimeter_m": 26.16,
|
||||
"main_dwelling_floor_2_party_wall_length_m": 0.0,
|
||||
"main_dwelling_roof_construction": 4,
|
||||
"main_dwelling_roof_insulation_location": 2,
|
||||
"main_dwelling_roof_insulation_thickness_mm": 100.0,
|
||||
"extension_floor_1_area_m2": 4.46,
|
||||
"extension_floor_1_height_m": 2.24,
|
||||
"extension_floor_1_heat_loss_perimeter_m": 6.85,
|
||||
"extension_floor_1_party_wall_length_m": 0.0,
|
||||
"extension_roof_construction": 8,
|
||||
"extension_roof_insulation_location": 7,
|
||||
}
|
||||
|
||||
|
||||
def test_flatten_no_roof():
|
||||
# Single building part with no roof — roof keys must be absent entirely
|
||||
|
||||
# arrange
|
||||
details: SapPropertyDetails = parse_rdsap(NO_ROOF_XML)
|
||||
|
||||
# act
|
||||
result = flatten_sap_property(details)
|
||||
|
||||
# assert
|
||||
assert result == {
|
||||
"reference": "5XY12AB",
|
||||
"address": "5, Somewhere, XY1 2AB",
|
||||
"property_type": "House",
|
||||
"main_dwelling_floor_1_area_m2": 50.0,
|
||||
"main_dwelling_floor_1_height_m": 2.5,
|
||||
"main_dwelling_floor_1_heat_loss_perimeter_m": 10.0,
|
||||
"main_dwelling_floor_1_party_wall_length_m": 3.0,
|
||||
}
|
||||
|
|
@ -28,6 +28,18 @@ def upload_file_to_sharepoint(
|
|||
)
|
||||
|
||||
|
||||
def upload_excel_to_sharepoint(
|
||||
client: DomnaSharepointClient,
|
||||
file_path: str,
|
||||
sharepoint_path: str,
|
||||
) -> None:
|
||||
client.upload_file(
|
||||
file_path=file_path,
|
||||
sharepoint_path=sharepoint_path,
|
||||
file_name=os.path.basename(file_path),
|
||||
)
|
||||
|
||||
|
||||
def upload_file_to_s3_and_update_db(
|
||||
bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum
|
||||
) -> None:
|
||||
|
|
|
|||
226
backend/ecmk_fetcher/xml_processor.py
Normal file
226
backend/ecmk_fetcher/xml_processor.py
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
import xml.etree.ElementTree as ET
|
||||
from typing import Any, List, Optional, TypedDict
|
||||
|
||||
|
||||
from backend.ecmk_fetcher.reports import build_property_id
|
||||
from datatypes.epc.domain.field_mappings import PROPERTY_TYPE_LOOKUP
|
||||
|
||||
|
||||
# This file should ultimately live somewhere different, probably
|
||||
class Floor(TypedDict):
|
||||
area_m2: float
|
||||
height_m: float
|
||||
heat_loss_perimeter_m: float
|
||||
party_wall_length_m: float
|
||||
|
||||
|
||||
class Roof(TypedDict, total=False):
|
||||
construction: int # TODO: map to str
|
||||
insulation_location: int | str # TODO: map to str
|
||||
insulation_thickness_mm: float | str
|
||||
|
||||
|
||||
class BuildingPart(TypedDict):
|
||||
identifier: str # e.g. "Main Dwelling", "Extension"
|
||||
floors: List[Floor]
|
||||
roof: Optional[Roof]
|
||||
|
||||
|
||||
class SapPropertyDetails(TypedDict):
|
||||
reference: str
|
||||
address: str
|
||||
property_type: str
|
||||
building_parts: List[BuildingPart]
|
||||
|
||||
|
||||
def _get_namespace(tag: str) -> str:
|
||||
return tag.split("}")[0].strip("{")
|
||||
|
||||
|
||||
def _require_text(value: Optional[str], field: str) -> str:
|
||||
if value is None:
|
||||
raise ValueError(f"Missing required field: {field}")
|
||||
return value
|
||||
|
||||
|
||||
def _parse_float(value: Optional[str], field: str) -> float:
|
||||
if value is None:
|
||||
raise ValueError(f"Missing float field: {field}")
|
||||
return float(value)
|
||||
|
||||
|
||||
def _parse_int(value: Optional[str], field: str) -> int:
|
||||
if value is None:
|
||||
raise ValueError(f"Missing int field: {field}")
|
||||
return int(value)
|
||||
|
||||
|
||||
def _parse_thickness_mm(value: Optional[str]) -> Optional[float | str]:
|
||||
if value is None:
|
||||
return None
|
||||
stripped = value.replace("mm", "").strip()
|
||||
try:
|
||||
return float(stripped)
|
||||
except ValueError:
|
||||
return stripped
|
||||
|
||||
|
||||
def parse_rdsap(xml_string: str) -> SapPropertyDetails:
|
||||
root = ET.fromstring(xml_string)
|
||||
|
||||
ns_uri: str = _get_namespace(root.tag)
|
||||
ns: dict[str, str] = {"r": ns_uri}
|
||||
|
||||
# --- Address ---
|
||||
addr_elem = root.find(".//r:Address", ns)
|
||||
if addr_elem is None:
|
||||
raise ValueError("Address element not found")
|
||||
|
||||
address_line_1: str = addr_elem.findtext(
|
||||
"r:Address-Line-1", default="", namespaces=ns
|
||||
)
|
||||
postcode: str = addr_elem.findtext("r:Postcode", default="", namespaces=ns)
|
||||
|
||||
address_parts: List[str] = [
|
||||
address_line_1,
|
||||
addr_elem.findtext("r:Address-Line-2", default="", namespaces=ns),
|
||||
addr_elem.findtext("r:Post-Town", default="", namespaces=ns),
|
||||
postcode,
|
||||
]
|
||||
|
||||
address: str = ", ".join(part for part in address_parts if part)
|
||||
reference: str = build_property_id(address_line_1, postcode)
|
||||
|
||||
# --- Property Type ---
|
||||
prop_type_text = root.findtext(".//r:Property-Type", namespaces=ns)
|
||||
prop_type_code: int = _parse_int(prop_type_text, "Property-Type")
|
||||
property_type: str = PROPERTY_TYPE_LOOKUP[prop_type_code]
|
||||
|
||||
# --- Building Parts ---
|
||||
building_parts: List[BuildingPart] = []
|
||||
|
||||
for bp in root.findall(".//r:SAP-Building-Part", ns):
|
||||
|
||||
identifier_text = bp.findtext("r:Identifier", namespaces=ns)
|
||||
identifier: str = _require_text(identifier_text, "Identifier")
|
||||
|
||||
# Floors
|
||||
floors: List[Floor] = []
|
||||
|
||||
for f in bp.findall(".//r:SAP-Floor-Dimension", ns):
|
||||
|
||||
area = _parse_float(
|
||||
f.findtext("r:Total-Floor-Area", namespaces=ns),
|
||||
"Total-Floor-Area",
|
||||
)
|
||||
|
||||
height = _parse_float(
|
||||
f.findtext("r:Room-Height", namespaces=ns),
|
||||
"Room-Height",
|
||||
)
|
||||
|
||||
heat_loss = _parse_float(
|
||||
f.findtext("r:Heat-Loss-Perimeter", namespaces=ns),
|
||||
"Heat-Loss-Perimeter",
|
||||
)
|
||||
|
||||
party_wall = _parse_float(
|
||||
f.findtext("r:Party-Wall-Length", namespaces=ns),
|
||||
"Party-Wall-Length",
|
||||
)
|
||||
|
||||
floor: Floor = {
|
||||
"area_m2": area,
|
||||
"height_m": height,
|
||||
"heat_loss_perimeter_m": heat_loss,
|
||||
"party_wall_length_m": party_wall,
|
||||
}
|
||||
|
||||
floors.append(floor)
|
||||
|
||||
# Roof (optional)
|
||||
roof: Optional[Roof] = None
|
||||
|
||||
roof_construction_text = bp.findtext("r:Roof-Construction", namespaces=ns)
|
||||
roof_ins_loc_text = bp.findtext("r:Roof-Insulation-Location", namespaces=ns)
|
||||
roof_thickness_text = bp.findtext("r:Roof-Insulation-Thickness", namespaces=ns)
|
||||
|
||||
if (
|
||||
roof_construction_text is not None
|
||||
or roof_ins_loc_text is not None
|
||||
or roof_thickness_text is not None
|
||||
):
|
||||
roof_dict: Roof = {}
|
||||
|
||||
if roof_construction_text is not None:
|
||||
roof_dict["construction"] = _parse_int(
|
||||
roof_construction_text, "Roof-Construction"
|
||||
)
|
||||
|
||||
if roof_ins_loc_text is not None:
|
||||
try:
|
||||
roof_dict["insulation_location"] = _parse_int(
|
||||
roof_ins_loc_text, "Roof-Insulation-Location"
|
||||
)
|
||||
except ValueError:
|
||||
roof_dict["insulation_location"] = roof_ins_loc_text
|
||||
|
||||
thickness = _parse_thickness_mm(roof_thickness_text)
|
||||
if thickness is not None:
|
||||
roof_dict["insulation_thickness_mm"] = thickness
|
||||
|
||||
roof = roof_dict
|
||||
|
||||
building_part: BuildingPart = {
|
||||
"identifier": identifier,
|
||||
"floors": floors,
|
||||
"roof": roof,
|
||||
}
|
||||
|
||||
building_parts.append(building_part)
|
||||
|
||||
result: SapPropertyDetails = {
|
||||
"reference": reference,
|
||||
"address": address,
|
||||
"property_type": property_type,
|
||||
"building_parts": building_parts,
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _normalise_identifier(identifier: str) -> str:
|
||||
return identifier.lower().replace(" ", "_").replace("-", "_")
|
||||
|
||||
|
||||
def flatten_sap_property(details: SapPropertyDetails) -> dict[str, Any]:
|
||||
row: dict[str, Any] = {}
|
||||
|
||||
row["reference"] = details["reference"]
|
||||
row["address"] = details["address"]
|
||||
row["property_type"] = details["property_type"]
|
||||
|
||||
for bp in details["building_parts"]:
|
||||
prefix = _normalise_identifier(bp["identifier"])
|
||||
|
||||
for i, floor in enumerate(bp["floors"], start=1):
|
||||
floor_prefix = f"{prefix}_floor_{i}"
|
||||
row[f"{floor_prefix}_area_m2"] = floor["area_m2"]
|
||||
row[f"{floor_prefix}_height_m"] = floor["height_m"]
|
||||
row[f"{floor_prefix}_heat_loss_perimeter_m"] = floor[
|
||||
"heat_loss_perimeter_m"
|
||||
]
|
||||
row[f"{floor_prefix}_party_wall_length_m"] = floor["party_wall_length_m"]
|
||||
|
||||
roof = bp.get("roof")
|
||||
if roof:
|
||||
if "construction" in roof:
|
||||
row[f"{prefix}_roof_construction"] = roof["construction"]
|
||||
if "insulation_location" in roof:
|
||||
row[f"{prefix}_roof_insulation_location"] = roof["insulation_location"]
|
||||
if "insulation_thickness_mm" in roof:
|
||||
row[f"{prefix}_roof_insulation_thickness_mm"] = roof[
|
||||
"insulation_thickness_mm"
|
||||
]
|
||||
|
||||
return row
|
||||
3
datatypes/epc/domain/field_mappings.py
Normal file
3
datatypes/epc/domain/field_mappings.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
PROPERTY_TYPE_LOOKUP = {0: "House", 1: "Bungalow", 2: "Flat", 3: "Maisonette"}
|
||||
ROOF_CONSTRUCTION_LOOKUP = {}
|
||||
ROOF_INSULATION_LOCATION_LOOKUP = {}
|
||||
|
|
@ -3,6 +3,6 @@ pythonpath = .
|
|||
log_cli = true
|
||||
log_cli_level = INFO
|
||||
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/
|
||||
markers =
|
||||
integration: mark a test as an integration test
|
||||
|
|
|
|||
|
|
@ -90,6 +90,41 @@ class DomnaSharepointClient:
|
|||
file_name, get_file_stream(file_path), sharepoint_path
|
||||
)
|
||||
|
||||
def download_file(self, sharepoint_path: str, local_path: str) -> bool:
|
||||
"""
|
||||
Download a file from SharePoint to a local path.
|
||||
|
||||
Returns True if the file was downloaded, False if it does not exist yet.
|
||||
Raises on any other error.
|
||||
"""
|
||||
sharepoint_client = SharePointClient(
|
||||
tenant_id=self.sharepoint_tenant_id,
|
||||
client_id=self.sharepoint_client_id,
|
||||
client_secret=self.sharepoint_client_secret,
|
||||
site_id=self.sharepoint_drive.value,
|
||||
)
|
||||
|
||||
try:
|
||||
metadata: Dict[str, Any] = sharepoint_client.get_file_metadata(sharepoint_path)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
download_url: Optional[str] = metadata.get("@microsoft.graph.downloadUrl")
|
||||
if not download_url:
|
||||
return False
|
||||
|
||||
content: BytesIO = SharePointClient.download_sharepoint_file(download_url)
|
||||
|
||||
parent_dir = os.path.dirname(local_path)
|
||||
if parent_dir:
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
|
||||
with open(local_path, "wb") as f:
|
||||
f.write(content.getvalue())
|
||||
|
||||
self.logger.debug(f"Downloaded SharePoint file to: {local_path}")
|
||||
return True
|
||||
|
||||
def create_temp_file(self, content: BytesIO, path: str):
|
||||
# Ensure the path is under /tmp/
|
||||
new_path = os.path.join("/tmp/sharepoint", path)
|
||||
|
|
|
|||
|
|
@ -278,6 +278,17 @@ class SharePointClient:
|
|||
# logger.debug(f"Listing folder contents from URL: {url}")
|
||||
return "GET", url, None
|
||||
|
||||
@api_call_decorator
|
||||
def get_file_metadata(self, file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
GET /drives/{drive-id}/root:/{file_path}
|
||||
|
||||
Returns file metadata, including '@microsoft.graph.downloadUrl'.
|
||||
Raises ValueError if the file does not exist (404).
|
||||
"""
|
||||
url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{file_path}"
|
||||
return "GET", url, None
|
||||
|
||||
@api_call_decorator
|
||||
def create_folder(self, file_name: str, folder_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
|
|
@ -325,7 +336,7 @@ class SharePointClient:
|
|||
return self.upload_file(file_name, sharepoint_parent_id, file_stream)
|
||||
|
||||
@staticmethod
|
||||
def download_sharepoint_file(download_url):
|
||||
def download_sharepoint_file(download_url: str) -> BytesIO:
|
||||
"""
|
||||
Downloads a file from the given URL and returns its content.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue