parse lbwf houses 🟩

This commit is contained in:
Daniel Roth 2026-01-19 16:23:11 +00:00
parent ee54ca648e
commit 6ff652ff34
8 changed files with 128 additions and 10 deletions

View file

@ -27,5 +27,8 @@
"ms-python.vscode-python-envs"
]
}
},
"containerEnv": {
"PYTHONFLAGS": "-Xfrozen_modules=off"
}
}

15
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}

View file

@ -11,6 +11,7 @@
},
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
// Hot reload setting that needs to be in user settings
// "jupyter.runStartupCommands": [

View file

@ -1,9 +1,99 @@
from typing import BinaryIO, Any
from typing import BinaryIO, Any, Dict, Iterator, List, Tuple
from openpyxl import Workbook, load_workbook
from datetime import date
from backend.condition.parsing.parser import Parser
from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition
from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition
from backend.condition.utils.date_utils import normalise_date
from utils.logger import setup_logger
logger = setup_logger
class LbwfParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any:
raise NotImplementedError
wb = load_workbook(file_stream)
urn_to_address_map: Dict[str, int] = LbwfParser._map_uprn_to_address(wb)
print(urn_to_address_map)
assets_sheet: Workbook = wb["Houses Asset Data"]
rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True)
headers = next(rows)
header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers)
assets: List[LbwfAssetCondition] = []
for row in rows:
try:
install_date: date = normalise_date(row[header_indexes["INSTALL DATE"]])
except ValueError as e:
logger.error(f"Failed to process LBWF asset due to badly formatted date: {e}")
continue
assets.append(LbwfAssetCondition(
uprn=0, #placeholder
prop_ref=row[header_indexes["PROP REF"]],
domna=row[header_indexes["Domna"]],
address=row[header_indexes["ADDRESS"]],
ownership=row[header_indexes["OWNERSHIP"]],
prop_status=row[header_indexes["PROP STATUS"]],
prop_type=row[header_indexes["PROP TYPE"]],
prop_sub_type=row[header_indexes["PROP SUB TYPE"]],
element_group=row[header_indexes["ELEMENT GROUP"]],
element_code=row[header_indexes["ELEMENT CODE"]],
element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]],
attribute_code=row[header_indexes["ATTRIBUTE CODE"]],
attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]],
element_date_value=row[header_indexes["ELEMENT DATE VALUE"]],
element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]],
element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]],
quantity=row[header_indexes["QUANTITY"]],
install_date=install_date,
remaining_life=row[header_indexes["REMAINING LIFE"]],
element_comments=row[header_indexes["ELEMENT COMMENTS"]],
))
return assets
@staticmethod
def _map_uprn_to_address(wb: Workbook) -> Dict[str, int | None]:
print(wb.sheetnames)
sheet: Workbook = wb["All Energy Breakdown "]
rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
headers = next(rows)
header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers)
address_idx = header_indexes["Address"]
uprn_idx = header_indexes["UPRN"]
mapping: Dict[str, int | None] = {}
for row in rows:
address = row[address_idx]
uprn = row[uprn_idx]
if not isinstance(address, str):
continue
if uprn is not None and not isinstance(uprn, int):
raise ValueError(f"Unexpected UPRN value: {uprn!r}")
mapping[address] = uprn
return mapping
def _get_column_indexes_by_name(
headers: Tuple[object | None, ...]
) -> Dict[str, int]:
index: Dict[str, int] = {}
for i, header in enumerate(headers):
if isinstance(header, str):
index[header] = i
return index

View file

@ -3,7 +3,7 @@ from datetime import date
@dataclass
class LbwfPropertyCondition:
class LbwfAssetCondition:
uprn: int
prop_ref: int
domna: int

View file

@ -1,4 +1,4 @@
from typing import BinaryIO, List
from typing import Any, BinaryIO, List
from backend.condition.parsing.parser import Parser
from utils.logger import setup_logger
@ -13,3 +13,4 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None:
parser: Parser = select_parser(file_type)
# Orchestration
records: List[Any] = parser.parse(file_stream)

View file

@ -3,10 +3,9 @@ import pytest
from io import BytesIO
from openpyxl import Workbook
from datetime import datetime
import debugpy
from backend.condition.parsing.lbwf_parser import LbwfParser
from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition
from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition
@pytest.fixture
def lbwf_homes_xlsx_bytes() -> BytesIO:
@ -78,7 +77,7 @@ def lbwf_homes_xlsx_bytes() -> BytesIO:
"Source of Data = Joe Bloggs",
])
all_energy_breakdown = wb.create_sheet("All Energy Breakdown")
all_energy_breakdown = wb.create_sheet("All Energy Breakdown ") # Trailing space is intentional; matches source
all_energy_breakdown.append([
"UPRN",
"Organisation Reference",
@ -108,7 +107,6 @@ def lbwf_homes_xlsx_bytes() -> BytesIO:
return stream
def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes):
debugpy.wait_for_client()
# arrange
parser = LbwfParser()
@ -117,4 +115,4 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes):
# assert
assert len(result) == 2
assert isinstance(result[0], LbwfPropertyCondition) # TODO: Improve these asserts
assert isinstance(result[0], LbwfAssetCondition) # TODO: Improve these asserts

View file

@ -0,0 +1,10 @@
from datetime import datetime, date
from typing import Any
def normalise_date(value: Any, allow_none: bool = True) -> date | None:
if value is None and allow_none:
return None
if isinstance(value, datetime):
return value.date()
raise ValueError(f"Unexpected date value: {value!r}")