From 6ff652ff3492222633e118d27619121aa2a65800 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:23:11 +0000 Subject: [PATCH] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .devcontainer/devcontainer.json | 3 + .vscode/launch.json | 15 +++ .vscode/settings.json | 1 + backend/condition/parsing/lbwf_parser.py | 96 ++++++++++++++++++- ...y_condition.py => lbwf_asset_condition.py} | 2 +- backend/condition/processor.py | 3 +- .../tests/parsing/test_lbwf_parser.py | 8 +- backend/condition/utils/date_utils.py | 10 ++ 8 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 .vscode/launch.json rename backend/condition/parsing/records/{lbwf_property_condition.py => lbwf_asset_condition.py} (95%) create mode 100644 backend/condition/utils/date_utils.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 91a76c3d..761786cd 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -27,5 +27,8 @@ "ms-python.vscode-python-envs" ] } + }, + "containerEnv": { + "PYTHONFLAGS": "-Xfrozen_modules=off" } } diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..6b76b4fa --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 9a9ea9f8..88c2ae2d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ }, "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, + "python.testing.pytestArgs": ["-s", "-q", "--no-cov"] // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 7404189f..fd65e24a 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,9 +1,99 @@ -from typing import BinaryIO, Any +from typing import BinaryIO, Any, Dict, Iterator, List, Tuple +from openpyxl import Workbook, load_workbook +from datetime import date from backend.condition.parsing.parser import Parser -from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition +from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.utils.date_utils import normalise_date +from utils.logger import setup_logger + +logger = setup_logger class LbwfParser(Parser): def parse(self, file_stream: BinaryIO) -> Any: - raise NotImplementedError \ No newline at end of file + wb = load_workbook(file_stream) + urn_to_address_map: Dict[str, int] = LbwfParser._map_uprn_to_address(wb) + print(urn_to_address_map) + + assets_sheet: Workbook = wb["Houses Asset Data"] + rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) + headers = next(rows) + header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + + assets: List[LbwfAssetCondition] = [] + + for row in rows: + try: + install_date: date = normalise_date(row[header_indexes["INSTALL DATE"]]) + except ValueError as e: + logger.error(f"Failed to process LBWF asset due to badly formatted date: {e}") + continue + + assets.append(LbwfAssetCondition( + uprn=0, #placeholder + prop_ref=row[header_indexes["PROP REF"]], + domna=row[header_indexes["Domna"]], + address=row[header_indexes["ADDRESS"]], + ownership=row[header_indexes["OWNERSHIP"]], + prop_status=row[header_indexes["PROP STATUS"]], + prop_type=row[header_indexes["PROP TYPE"]], + prop_sub_type=row[header_indexes["PROP SUB TYPE"]], + element_group=row[header_indexes["ELEMENT GROUP"]], + element_code=row[header_indexes["ELEMENT CODE"]], + element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]], + attribute_code=row[header_indexes["ATTRIBUTE CODE"]], + attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]], + element_date_value=row[header_indexes["ELEMENT DATE VALUE"]], + element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]], + element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]], + quantity=row[header_indexes["QUANTITY"]], + install_date=install_date, + remaining_life=row[header_indexes["REMAINING LIFE"]], + element_comments=row[header_indexes["ELEMENT COMMENTS"]], + )) + + return assets + + + @staticmethod + def _map_uprn_to_address(wb: Workbook) -> Dict[str, int | None]: + print(wb.sheetnames) + sheet: Workbook = wb["All Energy Breakdown "] + + rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True) + + headers = next(rows) + header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + + address_idx = header_indexes["Address"] + uprn_idx = header_indexes["UPRN"] + + mapping: Dict[str, int | None] = {} + + for row in rows: + address = row[address_idx] + uprn = row[uprn_idx] + + if not isinstance(address, str): + continue + + if uprn is not None and not isinstance(uprn, int): + raise ValueError(f"Unexpected UPRN value: {uprn!r}") + + mapping[address] = uprn + + return mapping + + + def _get_column_indexes_by_name( + headers: Tuple[object | None, ...] + ) -> Dict[str, int]: + index: Dict[str, int] = {} + + for i, header in enumerate(headers): + if isinstance(header, str): + index[header] = i + + return index + diff --git a/backend/condition/parsing/records/lbwf_property_condition.py b/backend/condition/parsing/records/lbwf_asset_condition.py similarity index 95% rename from backend/condition/parsing/records/lbwf_property_condition.py rename to backend/condition/parsing/records/lbwf_asset_condition.py index 1ecd00d6..3955350b 100644 --- a/backend/condition/parsing/records/lbwf_property_condition.py +++ b/backend/condition/parsing/records/lbwf_asset_condition.py @@ -3,7 +3,7 @@ from datetime import date @dataclass -class LbwfPropertyCondition: +class LbwfAssetCondition: uprn: int prop_ref: int domna: int diff --git a/backend/condition/processor.py b/backend/condition/processor.py index f19c4257..3939ba08 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,4 +1,4 @@ -from typing import BinaryIO, List +from typing import Any, BinaryIO, List from backend.condition.parsing.parser import Parser from utils.logger import setup_logger @@ -13,3 +13,4 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None: parser: Parser = select_parser(file_type) # Orchestration + records: List[Any] = parser.parse(file_stream) \ No newline at end of file diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index d9ed1e90..6a93979a 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -3,10 +3,9 @@ import pytest from io import BytesIO from openpyxl import Workbook from datetime import datetime -import debugpy from backend.condition.parsing.lbwf_parser import LbwfParser -from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition +from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition @pytest.fixture def lbwf_homes_xlsx_bytes() -> BytesIO: @@ -78,7 +77,7 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: "Source of Data = Joe Bloggs", ]) - all_energy_breakdown = wb.create_sheet("All Energy Breakdown") + all_energy_breakdown = wb.create_sheet("All Energy Breakdown ") # Trailing space is intentional; matches source all_energy_breakdown.append([ "UPRN", "Organisation Reference", @@ -108,7 +107,6 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: return stream def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): - debugpy.wait_for_client() # arrange parser = LbwfParser() @@ -117,4 +115,4 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): # assert assert len(result) == 2 - assert isinstance(result[0], LbwfPropertyCondition) # TODO: Improve these asserts + assert isinstance(result[0], LbwfAssetCondition) # TODO: Improve these asserts diff --git a/backend/condition/utils/date_utils.py b/backend/condition/utils/date_utils.py new file mode 100644 index 00000000..4535acd9 --- /dev/null +++ b/backend/condition/utils/date_utils.py @@ -0,0 +1,10 @@ +from datetime import datetime, date +from typing import Any + + +def normalise_date(value: Any, allow_none: bool = True) -> date | None: + if value is None and allow_none: + return None + if isinstance(value, datetime): + return value.date() + raise ValueError(f"Unexpected date value: {value!r}") \ No newline at end of file