From b808f132a858b9de30f27127c960aa97c6cc46a9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 16 Jan 2026 15:49:02 +0000 Subject: [PATCH 01/20] Update devcontainer and include test packages --- .devcontainer/Dockerfile | 20 +++++++++++++------- .devcontainer/requirements.txt | 4 +++- .vscode/settings.json | 2 ++ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 56c366f4..ccfb55b6 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,4 +1,5 @@ -FROM python:3.12-bullseye +FROM python:3.11.10-bullseye + ARG USER=vscode ARG DEBIAN_FRONTEND=noninteractive @@ -24,12 +25,17 @@ RUN useradd -m -s /usr/bin/bash ${USER} \ && echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \ && chmod 0440 /etc/sudoers.d/${USER} -# 4) Python deps -ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 -# Model +# # 4) Python deps - if you want to run assest list +# ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 # ADD asset_list/requirements.txt requirements.txt -# FASTAPI backend -ADD .devcontainer/requirements.txt requirements.txt +# RUN pip install -r requirements.txt + +# +ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 +ADD backend/engine/requirements.txt requirements1.txt +ADD backend/app/requirements/requirements.txt requirements2.txt +ADD .devcontainer/requirements.txt requirements3.txt +RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt RUN pip install -r requirements.txt # 5) Workdir @@ -37,4 +43,4 @@ WORKDIR /workspaces/model # 6) Make Python find your package # Add project root to PYTHONPATH for all processes -ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} +ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} \ No newline at end of file diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt index d8c51f19..3ffebf3e 100644 --- a/.devcontainer/requirements.txt +++ b/.devcontainer/requirements.txt @@ -14,4 +14,6 @@ openpyxl==3.1.2 pytz uvicorn[standard] sqlmodel - +# Testing +pytest==9.0.2 +pytest-cov==7.0.0 \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 27782c10..9a9ea9f8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,6 +9,8 @@ "path": "/bin/bash" } }, + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ From b1aca16be035d88dd44dda8564a07d44b841e632 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 16 Jan 2026 17:28:28 +0000 Subject: [PATCH 02/20] Define simple local runner --- .devcontainer/requirements.txt | 3 ++- .gitignore | 2 ++ backend/condition/__init__.py | 0 backend/condition/handler.py | 16 +++++++++++++++ backend/condition/ingestion/processor.py | 6 ++++++ backend/condition/local_runner.py | 25 ++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 backend/condition/__init__.py create mode 100644 backend/condition/handler.py create mode 100644 backend/condition/ingestion/processor.py create mode 100644 backend/condition/local_runner.py diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt index 3ffebf3e..300b86b0 100644 --- a/.devcontainer/requirements.txt +++ b/.devcontainer/requirements.txt @@ -16,4 +16,5 @@ uvicorn[standard] sqlmodel # Testing pytest==9.0.2 -pytest-cov==7.0.0 \ No newline at end of file +pytest-cov==7.0.0 +ipykernel>=6.25,<7 \ No newline at end of file diff --git a/.gitignore b/.gitignore index a6538116..625277a5 100644 --- a/.gitignore +++ b/.gitignore @@ -242,6 +242,8 @@ fabric.properties local_data/* /local_data/* etl/epc/local_data/* +/backend/condition/sample_data/lbwf/* +/backend/condition/sample_data/peadody/* *.DS_Store infrastructure/terraform/.terraform* diff --git a/backend/condition/__init__.py b/backend/condition/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/condition/handler.py b/backend/condition/handler.py new file mode 100644 index 00000000..9d26902b --- /dev/null +++ b/backend/condition/handler.py @@ -0,0 +1,16 @@ +from typing import Mapping, Any +from io import BytesIO + +from utils.logger import setup_logger +from ingestion.processor import process_file + + +logger = setup_logger() + +def handler(event: Mapping[str, Any], context: Any) -> None: + # Temporary stub for PoC wiring + dummy_stream = BytesIO(b"") + + source_key = event.get("source_key", "unknown-source") + + process_file(dummy_stream, source_key) \ No newline at end of file diff --git a/backend/condition/ingestion/processor.py b/backend/condition/ingestion/processor.py new file mode 100644 index 00000000..1653f310 --- /dev/null +++ b/backend/condition/ingestion/processor.py @@ -0,0 +1,6 @@ +from typing import BinaryIO, List + +from utils.logger import setup_logger + +def process_file(file_stream: BinaryIO, source_key: str) -> None: + print(f"[processor] Received file: {source_key}") \ No newline at end of file diff --git a/backend/condition/local_runner.py b/backend/condition/local_runner.py new file mode 100644 index 00000000..f27e04dc --- /dev/null +++ b/backend/condition/local_runner.py @@ -0,0 +1,25 @@ +from pathlib import Path + +from ingestion.processor import process_file + +def main() -> None: + try: + # Works in scripts / debugger / pytest + ROOT_DIR = Path(__file__).resolve().parents[1] + except NameError: + # __file__ is not defined in notebooks + ROOT_DIR = Path.cwd() + + path: Path = ROOT_DIR / "condition" / "sample_data" + + lbwf_path: Path = path / "lbwf" / "LBWF - Example Asset Data September 2025.xlsx" # TODO: get this from s3 as part of devcontainer init + + with lbwf_path.open("rb") as f: + process_file( + file_stream=f, + source_key=lbwf_path.as_posix(), + ) + +if __name__ == "__main__": + main() + From e277e270ab9c2a36caf25549a94ea86f6828de30 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 10:13:00 +0000 Subject: [PATCH 03/20] Move processor.py out of ingestion directory --- backend/condition/handler.py | 2 +- backend/condition/local_runner.py | 2 +- backend/condition/{ingestion => }/processor.py | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename backend/condition/{ingestion => }/processor.py (100%) diff --git a/backend/condition/handler.py b/backend/condition/handler.py index 9d26902b..5279b029 100644 --- a/backend/condition/handler.py +++ b/backend/condition/handler.py @@ -2,7 +2,7 @@ from typing import Mapping, Any from io import BytesIO from utils.logger import setup_logger -from ingestion.processor import process_file +from backend.condition.processor import process_file logger = setup_logger() diff --git a/backend/condition/local_runner.py b/backend/condition/local_runner.py index f27e04dc..28f9b06c 100644 --- a/backend/condition/local_runner.py +++ b/backend/condition/local_runner.py @@ -1,6 +1,6 @@ from pathlib import Path -from ingestion.processor import process_file +from backend.condition.processor import process_file def main() -> None: try: diff --git a/backend/condition/ingestion/processor.py b/backend/condition/processor.py similarity index 100% rename from backend/condition/ingestion/processor.py rename to backend/condition/processor.py From c073a4cb431c60384c68ec9a54f7c503a765a5b4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:08:30 +0000 Subject: [PATCH 04/20] =?UTF-8?q?Parser=20factory=20chooses=20parser=20cla?= =?UTF-8?q?ss=20based=20on=20filepath=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/factory.py | 4 ++++ backend/condition/parsing/lbwf_parser.py | 8 ++++++++ backend/condition/parsing/parser.py | 8 ++++++++ backend/condition/processor.py | 8 +++++++- .../condition/tests/parsing/test_parsing_factory.py | 12 ++++++++++++ pytest.ini | 2 +- 6 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 backend/condition/parsing/factory.py create mode 100644 backend/condition/parsing/lbwf_parser.py create mode 100644 backend/condition/parsing/parser.py create mode 100644 backend/condition/tests/parsing/test_parsing_factory.py diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py new file mode 100644 index 00000000..55b46253 --- /dev/null +++ b/backend/condition/parsing/factory.py @@ -0,0 +1,4 @@ +from backend.condition.parsing.parser import Parser + +def select_parser(filepath: str) -> Parser: + raise NotImplementedError \ No newline at end of file diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py new file mode 100644 index 00000000..b0c233d3 --- /dev/null +++ b/backend/condition/parsing/lbwf_parser.py @@ -0,0 +1,8 @@ +from typing import BinaryIO, Any + +from backend.condition.parsing.parser import Parser + +class LbwfParser(Parser): + + def parse(self, file_stream: BinaryIO) -> Any: + raise NotImplementedError \ No newline at end of file diff --git a/backend/condition/parsing/parser.py b/backend/condition/parsing/parser.py new file mode 100644 index 00000000..105fda36 --- /dev/null +++ b/backend/condition/parsing/parser.py @@ -0,0 +1,8 @@ +from abc import ABC, abstractmethod +from typing import BinaryIO, Any + +class Parser(ABC): + + @abstractmethod + def parse(self, file_stream: BinaryIO) -> Any: + pass \ No newline at end of file diff --git a/backend/condition/processor.py b/backend/condition/processor.py index 1653f310..82f1b92e 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -3,4 +3,10 @@ from typing import BinaryIO, List from utils.logger import setup_logger def process_file(file_stream: BinaryIO, source_key: str) -> None: - print(f"[processor] Received file: {source_key}") \ No newline at end of file + print(f"[processor] Received file: {source_key}") + + # Instantiation + + + # Orchestration + diff --git a/backend/condition/tests/parsing/test_parsing_factory.py b/backend/condition/tests/parsing/test_parsing_factory.py new file mode 100644 index 00000000..dc2949f0 --- /dev/null +++ b/backend/condition/tests/parsing/test_parsing_factory.py @@ -0,0 +1,12 @@ +from backend.condition.parsing.factory import select_parser + +def test_selects_lbwf_parser(): + # arrange + file_path_str = "uploads/lbwf/Example Asset Data.xlsx" + expected_class_name = "LbwfParser" + + # act + actual_class_name = select_parser(file_path_str).__class__.__name__ + + # assert + assert expected_class_name == actual_class_name \ No newline at end of file diff --git a/pytest.ini b/pytest.ini index 84c686b1..1422657b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] pythonpath = . addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests From 4d36fce83d56a193a89b28196814870f91cc2644 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:13:14 +0000 Subject: [PATCH 05/20] =?UTF-8?q?Parser=20factory=20chooses=20parser=20cla?= =?UTF-8?q?ss=20based=20on=20filepath=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/factory.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py index 55b46253..c2963079 100644 --- a/backend/condition/parsing/factory.py +++ b/backend/condition/parsing/factory.py @@ -1,4 +1,8 @@ from backend.condition.parsing.parser import Parser +from backend.condition.parsing.lbwf_parser import LbwfParser def select_parser(filepath: str) -> Parser: - raise NotImplementedError \ No newline at end of file + path = filepath.lower() + + if "lbwf" in path: + return LbwfParser() \ No newline at end of file From 9244689e76668f9a499b3e9fef3736199b089f52 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:17:15 +0000 Subject: [PATCH 06/20] =?UTF-8?q?Parser=20factory=20raises=20value=20error?= =?UTF-8?q?=20on=20unknown=20file=20path=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../condition/tests/parsing/test_parsing_factory.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/backend/condition/tests/parsing/test_parsing_factory.py b/backend/condition/tests/parsing/test_parsing_factory.py index dc2949f0..4e373a12 100644 --- a/backend/condition/tests/parsing/test_parsing_factory.py +++ b/backend/condition/tests/parsing/test_parsing_factory.py @@ -1,3 +1,5 @@ +import pytest + from backend.condition.parsing.factory import select_parser def test_selects_lbwf_parser(): @@ -9,4 +11,12 @@ def test_selects_lbwf_parser(): actual_class_name = select_parser(file_path_str).__class__.__name__ # assert - assert expected_class_name == actual_class_name \ No newline at end of file + assert expected_class_name == actual_class_name + +def test_unknown_filepath_raises_value_error(): + # arrange + file_path_str = "unkown/Example Asset Data.xlsx" + + # act + assert + with pytest.raises(ValueError): + select_parser(file_path_str) \ No newline at end of file From 6fd4b19e886bc962fe4d119590c2b2e4fdd6d6e2 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:18:15 +0000 Subject: [PATCH 07/20] =?UTF-8?q?Parser=20factory=20raises=20value=20error?= =?UTF-8?q?=20on=20unknown=20file=20path=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/factory.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py index c2963079..7db8383f 100644 --- a/backend/condition/parsing/factory.py +++ b/backend/condition/parsing/factory.py @@ -5,4 +5,6 @@ def select_parser(filepath: str) -> Parser: path = filepath.lower() if "lbwf" in path: - return LbwfParser() \ No newline at end of file + return LbwfParser() + + raise ValueError("Unrecognised file path, unable to instantiate Parser") \ No newline at end of file From 049a93fa26358210c0c12aaecc3f1812077af806 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 11:47:32 +0000 Subject: [PATCH 08/20] =?UTF-8?q?Create=20FileType=20enum=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/file_type.py | 12 ++++++++++ backend/condition/parsing/factory.py | 9 ++++---- backend/condition/processor.py | 2 +- .../tests/parsing/test_parsing_factory.py | 15 ++++--------- .../condition/tests/test_detect_file_type.py | 22 +++++++++++++++++++ 5 files changed, 43 insertions(+), 17 deletions(-) create mode 100644 backend/condition/file_type.py create mode 100644 backend/condition/tests/test_detect_file_type.py diff --git a/backend/condition/file_type.py b/backend/condition/file_type.py new file mode 100644 index 00000000..b9a4357f --- /dev/null +++ b/backend/condition/file_type.py @@ -0,0 +1,12 @@ +from enum import Enum + +class FileType(Enum): + LBWF = "lbwf" + +def detect_file_type(filepath: str) -> FileType: + path = filepath.lower() + + if "lbwf" in path: + return FileType.LBWF + + raise ValueError("Unrecognised file path") \ No newline at end of file diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py index 7db8383f..01dce75d 100644 --- a/backend/condition/parsing/factory.py +++ b/backend/condition/parsing/factory.py @@ -1,10 +1,9 @@ +from backend.condition.file_type import FileType from backend.condition.parsing.parser import Parser from backend.condition.parsing.lbwf_parser import LbwfParser -def select_parser(filepath: str) -> Parser: - path = filepath.lower() - - if "lbwf" in path: +def select_parser(file_type: FileType) -> Parser: + if file_type is FileType.LBWF: return LbwfParser() - raise ValueError("Unrecognised file path, unable to instantiate Parser") \ No newline at end of file + raise ValueError("Unrecognised file type, unable to instantiate Parser") diff --git a/backend/condition/processor.py b/backend/condition/processor.py index 82f1b92e..c4dcabc2 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,6 +1,7 @@ from typing import BinaryIO, List from utils.logger import setup_logger +from backend.condition.file_type import FileType def process_file(file_stream: BinaryIO, source_key: str) -> None: print(f"[processor] Received file: {source_key}") @@ -9,4 +10,3 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None: # Orchestration - diff --git a/backend/condition/tests/parsing/test_parsing_factory.py b/backend/condition/tests/parsing/test_parsing_factory.py index 4e373a12..481418d7 100644 --- a/backend/condition/tests/parsing/test_parsing_factory.py +++ b/backend/condition/tests/parsing/test_parsing_factory.py @@ -1,22 +1,15 @@ import pytest from backend.condition.parsing.factory import select_parser +from backend.condition.file_type import FileType def test_selects_lbwf_parser(): # arrange - file_path_str = "uploads/lbwf/Example Asset Data.xlsx" + file_type = FileType.LBWF expected_class_name = "LbwfParser" # act - actual_class_name = select_parser(file_path_str).__class__.__name__ + actual_class_name = select_parser(file_type).__class__.__name__ # assert - assert expected_class_name == actual_class_name - -def test_unknown_filepath_raises_value_error(): - # arrange - file_path_str = "unkown/Example Asset Data.xlsx" - - # act + assert - with pytest.raises(ValueError): - select_parser(file_path_str) \ No newline at end of file + assert expected_class_name == actual_class_name \ No newline at end of file diff --git a/backend/condition/tests/test_detect_file_type.py b/backend/condition/tests/test_detect_file_type.py new file mode 100644 index 00000000..fecf22c1 --- /dev/null +++ b/backend/condition/tests/test_detect_file_type.py @@ -0,0 +1,22 @@ +import pytest + +from backend.condition.file_type import FileType, detect_file_type + +def test_detects_lbwf_file_type(): + # arrange + file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx" + expected_file_type = FileType.LBWF + + # act + actual_file_type: FileType = detect_file_type(file_path_str) + + # assert + assert expected_file_type == actual_file_type + +def test_unknown_filepath_raises_value_error(): + # arrange + file_path_str = "unknown/Example Asset Data.xlsx" + + # act + assert + with pytest.raises(ValueError): + detect_file_type(file_path_str) \ No newline at end of file From 00a707500ee86e58f1b02c219af50c2a45439174 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 14:40:35 +0000 Subject: [PATCH 09/20] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 1 + .../records/lbwf_property_condition.py | 27 +++++ backend/condition/processor.py | 7 +- .../tests/parsing/test_lbwf_parser.py | 98 +++++++++++++++++++ 4 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 backend/condition/parsing/records/lbwf_property_condition.py create mode 100644 backend/condition/tests/parsing/test_lbwf_parser.py diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index b0c233d3..7404189f 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,6 +1,7 @@ from typing import BinaryIO, Any from backend.condition.parsing.parser import Parser +from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition class LbwfParser(Parser): diff --git a/backend/condition/parsing/records/lbwf_property_condition.py b/backend/condition/parsing/records/lbwf_property_condition.py new file mode 100644 index 00000000..1ecd00d6 --- /dev/null +++ b/backend/condition/parsing/records/lbwf_property_condition.py @@ -0,0 +1,27 @@ +from dataclasses import dataclass +from datetime import date + + +@dataclass +class LbwfPropertyCondition: + uprn: int + prop_ref: int + domna: int + address: str + ownership: str + prop_status: str + prop_type: str # TODO: make this enum? + prop_sub_type: str # TODO: make this enum? + element_group: str + element_code: str + element_code_description: str + attribute_code: str + attribute_code_description: str + element_date_value: str | None = None + element_numerical_value: int | None = None + element_text_value: str | None = None + quantity: int | None = None + install_date: date | None = None + remaining_life: int | None = None + element_comments: str | None = None + diff --git a/backend/condition/processor.py b/backend/condition/processor.py index c4dcabc2..f19c4257 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,12 +1,15 @@ from typing import BinaryIO, List +from backend.condition.parsing.parser import Parser from utils.logger import setup_logger -from backend.condition.file_type import FileType +from backend.condition.file_type import FileType, detect_file_type +from backend.condition.parsing.factory import select_parser def process_file(file_stream: BinaryIO, source_key: str) -> None: print(f"[processor] Received file: {source_key}") # Instantiation - + file_type: FileType = detect_file_type(source_key) + parser: Parser = select_parser(file_type) # Orchestration diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py new file mode 100644 index 00000000..4c5be5cd --- /dev/null +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -0,0 +1,98 @@ +from typing import Any +import pytest +from io import BytesIO +from openpyxl import Workbook +from datetime import datetime +import debugpy + +from backend.condition.parsing.lbwf_parser import LbwfParser +from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition + +@pytest.fixture +def lbwf_homes_xlsx_bytes() -> BytesIO: + wb = Workbook() + houses_asset_data = wb.active + houses_asset_data.title = "Houses Asset Data" + houses_asset_data.append([ + "PROP REF", + "Domna", + "ADDRESS", + "OWNERSHIP", + "PROP STATUS", + "PROP TYPE", + "PROP SUB TYPE", + "ELEMENT GROUP", + "ELEMENT CODE", + "ELEMENT CODE DESCRIPTION", + "ATTRIBUTE CODE", + "ATTRIBUTE CODE DESCRIPTION", + "ELEMENT DATE VALUE", + "ELEMENT NUMERIC VALUE", + "ELEMENT TEXT VALUE", + "QUANTITY", + "INSTALL DATE", + "REMAINING LIFE", + "ELEMENT COMMENTS" + ] + ) + + houses_asset_data.append([ + 12345, + 12345, + "123 Fake Street", + "LBWF_OWNED", + "OCCP", + "HOU", + "TERRACED", + "ASSETS", + "AHR_CAT", + "Accessible Housing Register Category", + "F", + "General Needs", + None, + None, + None, + 1, + None, + None, + None, + ]) + houses_asset_data.append([ + 54321, + 54321, + "100 Random Road", + "LBWF_OWNED", + "OCCP", + "HOU", + "EOT", + "ASSETS", + "INTSMKDET", + "Smoke Detectors in Property", + "HARDWRDMNS", + "Hard Wired Mains Smoke Alarm in Property", + None, + None, + None, + 2, + datetime(2019,4,1), + 4, + "Source of Data = Joe Bloggs", + ]) + + stream = BytesIO() + wb.save(stream) + stream.seek(0) + + return stream + +def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): + debugpy.wait_for_client() + # arrange + parser = LbwfParser() + + # act + result: Any = parser.parse(lbwf_homes_xlsx_bytes) + + # assert + assert len(result) == 2 + assert isinstance(result[0], LbwfPropertyCondition) \ No newline at end of file From 60f3f0c2227a39a076b5631f5d1a5604ebd0b9ba Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 14:40:35 +0000 Subject: [PATCH 10/20] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 1 + .../records/lbwf_property_condition.py | 27 ++++ backend/condition/processor.py | 7 +- .../tests/parsing/test_lbwf_parser.py | 120 ++++++++++++++++++ 4 files changed, 153 insertions(+), 2 deletions(-) create mode 100644 backend/condition/parsing/records/lbwf_property_condition.py create mode 100644 backend/condition/tests/parsing/test_lbwf_parser.py diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index b0c233d3..7404189f 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,6 +1,7 @@ from typing import BinaryIO, Any from backend.condition.parsing.parser import Parser +from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition class LbwfParser(Parser): diff --git a/backend/condition/parsing/records/lbwf_property_condition.py b/backend/condition/parsing/records/lbwf_property_condition.py new file mode 100644 index 00000000..1ecd00d6 --- /dev/null +++ b/backend/condition/parsing/records/lbwf_property_condition.py @@ -0,0 +1,27 @@ +from dataclasses import dataclass +from datetime import date + + +@dataclass +class LbwfPropertyCondition: + uprn: int + prop_ref: int + domna: int + address: str + ownership: str + prop_status: str + prop_type: str # TODO: make this enum? + prop_sub_type: str # TODO: make this enum? + element_group: str + element_code: str + element_code_description: str + attribute_code: str + attribute_code_description: str + element_date_value: str | None = None + element_numerical_value: int | None = None + element_text_value: str | None = None + quantity: int | None = None + install_date: date | None = None + remaining_life: int | None = None + element_comments: str | None = None + diff --git a/backend/condition/processor.py b/backend/condition/processor.py index c4dcabc2..f19c4257 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,12 +1,15 @@ from typing import BinaryIO, List +from backend.condition.parsing.parser import Parser from utils.logger import setup_logger -from backend.condition.file_type import FileType +from backend.condition.file_type import FileType, detect_file_type +from backend.condition.parsing.factory import select_parser def process_file(file_stream: BinaryIO, source_key: str) -> None: print(f"[processor] Received file: {source_key}") # Instantiation - + file_type: FileType = detect_file_type(source_key) + parser: Parser = select_parser(file_type) # Orchestration diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py new file mode 100644 index 00000000..f7a7d162 --- /dev/null +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -0,0 +1,120 @@ +from typing import Any +import pytest +from io import BytesIO +from openpyxl import Workbook +from datetime import datetime +import debugpy + +from backend.condition.parsing.lbwf_parser import LbwfParser +from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition + +@pytest.fixture +def lbwf_homes_xlsx_bytes() -> BytesIO: + wb = Workbook() + houses_asset_data = wb.active + houses_asset_data.title = "Houses Asset Data" + houses_asset_data.append([ + "PROP REF", + "Domna", + "ADDRESS", + "OWNERSHIP", + "PROP STATUS", + "PROP TYPE", + "PROP SUB TYPE", + "ELEMENT GROUP", + "ELEMENT CODE", + "ELEMENT CODE DESCRIPTION", + "ATTRIBUTE CODE", + "ATTRIBUTE CODE DESCRIPTION", + "ELEMENT DATE VALUE", + "ELEMENT NUMERIC VALUE", + "ELEMENT TEXT VALUE", + "QUANTITY", + "INSTALL DATE", + "REMAINING LIFE", + "ELEMENT COMMENTS" + ] + ) + houses_asset_data.append([ + 12345, + 12345, + "123 Fake Street, London, A10 1AB", + "LBWF_OWNED", + "OCCP", + "HOU", + "TERRACED", + "ASSETS", + "AHR_CAT", + "Accessible Housing Register Category", + "F", + "General Needs", + None, + None, + None, + 1, + None, + None, + None, + ]) + houses_asset_data.append([ + 54321, + 54321, + "100 Random Road, London, A10 1AB", + "LBWF_OWNED", + "OCCP", + "HOU", + "EOT", + "ASSETS", + "INTSMKDET", + "Smoke Detectors in Property", + "HARDWRDMNS", + "Hard Wired Mains Smoke Alarm in Property", + None, + None, + None, + 2, + datetime(2019,4,1), + 4, + "Source of Data = Joe Bloggs", + ]) + + all_energy_breakdown = wb.create_sheet("All Energy Breakdown") + all_energy_breakdown.append([ + "UPRN", + "Organisation Reference", + "Alternate Organisation Reference", + "Address", + "Postcode" + ]) + all_energy_breakdown.append([ + 1, + 100, + 101, + "100 RANDOM ROAD", + "A10 1AB" + ]) + all_energy_breakdown.append([ + 2, + 200, + None, + "123 FAKE STREET", + "A10 1AB" + ]) + + stream = BytesIO() + wb.save(stream) + stream.seek(0) + + return stream + +def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): + debugpy.wait_for_client() + # arrange + parser = LbwfParser() + + # act + result: Any = parser.parse(lbwf_homes_xlsx_bytes) + + # assert + assert len(result) == 2 + assert isinstance(result[0], LbwfPropertyCondition) # TODO: Improve these asserts \ No newline at end of file From 6ff652ff3492222633e118d27619121aa2a65800 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:23:11 +0000 Subject: [PATCH 11/20] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .devcontainer/devcontainer.json | 3 + .vscode/launch.json | 15 +++ .vscode/settings.json | 1 + backend/condition/parsing/lbwf_parser.py | 96 ++++++++++++++++++- ...y_condition.py => lbwf_asset_condition.py} | 2 +- backend/condition/processor.py | 3 +- .../tests/parsing/test_lbwf_parser.py | 8 +- backend/condition/utils/date_utils.py | 10 ++ 8 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 .vscode/launch.json rename backend/condition/parsing/records/{lbwf_property_condition.py => lbwf_asset_condition.py} (95%) create mode 100644 backend/condition/utils/date_utils.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 91a76c3d..761786cd 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -27,5 +27,8 @@ "ms-python.vscode-python-envs" ] } + }, + "containerEnv": { + "PYTHONFLAGS": "-Xfrozen_modules=off" } } diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..6b76b4fa --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 9a9ea9f8..88c2ae2d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ }, "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, + "python.testing.pytestArgs": ["-s", "-q", "--no-cov"] // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 7404189f..fd65e24a 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,9 +1,99 @@ -from typing import BinaryIO, Any +from typing import BinaryIO, Any, Dict, Iterator, List, Tuple +from openpyxl import Workbook, load_workbook +from datetime import date from backend.condition.parsing.parser import Parser -from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition +from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.utils.date_utils import normalise_date +from utils.logger import setup_logger + +logger = setup_logger class LbwfParser(Parser): def parse(self, file_stream: BinaryIO) -> Any: - raise NotImplementedError \ No newline at end of file + wb = load_workbook(file_stream) + urn_to_address_map: Dict[str, int] = LbwfParser._map_uprn_to_address(wb) + print(urn_to_address_map) + + assets_sheet: Workbook = wb["Houses Asset Data"] + rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) + headers = next(rows) + header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + + assets: List[LbwfAssetCondition] = [] + + for row in rows: + try: + install_date: date = normalise_date(row[header_indexes["INSTALL DATE"]]) + except ValueError as e: + logger.error(f"Failed to process LBWF asset due to badly formatted date: {e}") + continue + + assets.append(LbwfAssetCondition( + uprn=0, #placeholder + prop_ref=row[header_indexes["PROP REF"]], + domna=row[header_indexes["Domna"]], + address=row[header_indexes["ADDRESS"]], + ownership=row[header_indexes["OWNERSHIP"]], + prop_status=row[header_indexes["PROP STATUS"]], + prop_type=row[header_indexes["PROP TYPE"]], + prop_sub_type=row[header_indexes["PROP SUB TYPE"]], + element_group=row[header_indexes["ELEMENT GROUP"]], + element_code=row[header_indexes["ELEMENT CODE"]], + element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]], + attribute_code=row[header_indexes["ATTRIBUTE CODE"]], + attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]], + element_date_value=row[header_indexes["ELEMENT DATE VALUE"]], + element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]], + element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]], + quantity=row[header_indexes["QUANTITY"]], + install_date=install_date, + remaining_life=row[header_indexes["REMAINING LIFE"]], + element_comments=row[header_indexes["ELEMENT COMMENTS"]], + )) + + return assets + + + @staticmethod + def _map_uprn_to_address(wb: Workbook) -> Dict[str, int | None]: + print(wb.sheetnames) + sheet: Workbook = wb["All Energy Breakdown "] + + rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True) + + headers = next(rows) + header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + + address_idx = header_indexes["Address"] + uprn_idx = header_indexes["UPRN"] + + mapping: Dict[str, int | None] = {} + + for row in rows: + address = row[address_idx] + uprn = row[uprn_idx] + + if not isinstance(address, str): + continue + + if uprn is not None and not isinstance(uprn, int): + raise ValueError(f"Unexpected UPRN value: {uprn!r}") + + mapping[address] = uprn + + return mapping + + + def _get_column_indexes_by_name( + headers: Tuple[object | None, ...] + ) -> Dict[str, int]: + index: Dict[str, int] = {} + + for i, header in enumerate(headers): + if isinstance(header, str): + index[header] = i + + return index + diff --git a/backend/condition/parsing/records/lbwf_property_condition.py b/backend/condition/parsing/records/lbwf_asset_condition.py similarity index 95% rename from backend/condition/parsing/records/lbwf_property_condition.py rename to backend/condition/parsing/records/lbwf_asset_condition.py index 1ecd00d6..3955350b 100644 --- a/backend/condition/parsing/records/lbwf_property_condition.py +++ b/backend/condition/parsing/records/lbwf_asset_condition.py @@ -3,7 +3,7 @@ from datetime import date @dataclass -class LbwfPropertyCondition: +class LbwfAssetCondition: uprn: int prop_ref: int domna: int diff --git a/backend/condition/processor.py b/backend/condition/processor.py index f19c4257..3939ba08 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,4 +1,4 @@ -from typing import BinaryIO, List +from typing import Any, BinaryIO, List from backend.condition.parsing.parser import Parser from utils.logger import setup_logger @@ -13,3 +13,4 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None: parser: Parser = select_parser(file_type) # Orchestration + records: List[Any] = parser.parse(file_stream) \ No newline at end of file diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index d9ed1e90..6a93979a 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -3,10 +3,9 @@ import pytest from io import BytesIO from openpyxl import Workbook from datetime import datetime -import debugpy from backend.condition.parsing.lbwf_parser import LbwfParser -from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition +from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition @pytest.fixture def lbwf_homes_xlsx_bytes() -> BytesIO: @@ -78,7 +77,7 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: "Source of Data = Joe Bloggs", ]) - all_energy_breakdown = wb.create_sheet("All Energy Breakdown") + all_energy_breakdown = wb.create_sheet("All Energy Breakdown ") # Trailing space is intentional; matches source all_energy_breakdown.append([ "UPRN", "Organisation Reference", @@ -108,7 +107,6 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: return stream def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): - debugpy.wait_for_client() # arrange parser = LbwfParser() @@ -117,4 +115,4 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): # assert assert len(result) == 2 - assert isinstance(result[0], LbwfPropertyCondition) # TODO: Improve these asserts + assert isinstance(result[0], LbwfAssetCondition) # TODO: Improve these asserts diff --git a/backend/condition/utils/date_utils.py b/backend/condition/utils/date_utils.py new file mode 100644 index 00000000..4535acd9 --- /dev/null +++ b/backend/condition/utils/date_utils.py @@ -0,0 +1,10 @@ +from datetime import datetime, date +from typing import Any + + +def normalise_date(value: Any, allow_none: bool = True) -> date | None: + if value is None and allow_none: + return None + if isinstance(value, datetime): + return value.date() + raise ValueError(f"Unexpected date value: {value!r}") \ No newline at end of file From 4553e9937bb8ee10dcb61fd86647f48c8695d5cb Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:28:51 +0000 Subject: [PATCH 12/20] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 54 ++++++++++++------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index fd65e24a..2d6463c9 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -25,36 +25,36 @@ class LbwfParser(Parser): for row in rows: try: - install_date: date = normalise_date(row[header_indexes["INSTALL DATE"]]) - except ValueError as e: - logger.error(f"Failed to process LBWF asset due to badly formatted date: {e}") - continue + assets.append(LbwfParser._map_row_to_asset_record(row, header_indexes)) + except Exception as e: + logger.error(f"Error mapping LBWF row to asset record: {e}") - assets.append(LbwfAssetCondition( - uprn=0, #placeholder - prop_ref=row[header_indexes["PROP REF"]], - domna=row[header_indexes["Domna"]], - address=row[header_indexes["ADDRESS"]], - ownership=row[header_indexes["OWNERSHIP"]], - prop_status=row[header_indexes["PROP STATUS"]], - prop_type=row[header_indexes["PROP TYPE"]], - prop_sub_type=row[header_indexes["PROP SUB TYPE"]], - element_group=row[header_indexes["ELEMENT GROUP"]], - element_code=row[header_indexes["ELEMENT CODE"]], - element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]], - attribute_code=row[header_indexes["ATTRIBUTE CODE"]], - attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]], - element_date_value=row[header_indexes["ELEMENT DATE VALUE"]], - element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]], - element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]], - quantity=row[header_indexes["QUANTITY"]], - install_date=install_date, - remaining_life=row[header_indexes["REMAINING LIFE"]], - element_comments=row[header_indexes["ELEMENT COMMENTS"]], - )) - return assets + @staticmethod + def _map_row_to_asset_record(row: Any | Tuple[object | None, ...], header_indexes: Dict[str, int]) -> LbwfAssetCondition: + return LbwfAssetCondition( + uprn=0, #placeholder + prop_ref=row[header_indexes["PROP REF"]], + domna=row[header_indexes["Domna"]], + address=row[header_indexes["ADDRESS"]], + ownership=row[header_indexes["OWNERSHIP"]], + prop_status=row[header_indexes["PROP STATUS"]], + prop_type=row[header_indexes["PROP TYPE"]], + prop_sub_type=row[header_indexes["PROP SUB TYPE"]], + element_group=row[header_indexes["ELEMENT GROUP"]], + element_code=row[header_indexes["ELEMENT CODE"]], + element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]], + attribute_code=row[header_indexes["ATTRIBUTE CODE"]], + attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]], + element_date_value=row[header_indexes["ELEMENT DATE VALUE"]], + element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]], + element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]], + quantity=row[header_indexes["QUANTITY"]], + install_date=normalise_date(row[header_indexes["INSTALL DATE"]]), + remaining_life=row[header_indexes["REMAINING LIFE"]], + element_comments=row[header_indexes["ELEMENT COMMENTS"]], + ) @staticmethod def _map_uprn_to_address(wb: Workbook) -> Dict[str, int | None]: From 330580c7750641f1aac7b866791948b8db774537 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:32:16 +0000 Subject: [PATCH 13/20] =?UTF-8?q?map=20correct=20uprn=20when=20parsing=20a?= =?UTF-8?q?sset=20conditions=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/tests/parsing/test_lbwf_parser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index 6a93979a..e962f229 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -114,5 +114,8 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): result: Any = parser.parse(lbwf_homes_xlsx_bytes) # assert + # TODO: Improve these asserts assert len(result) == 2 - assert isinstance(result[0], LbwfAssetCondition) # TODO: Improve these asserts + assert isinstance(result[0], LbwfAssetCondition) + assert result[0].uprn == 1 + assert result[1].uprn == 2 \ No newline at end of file From 12bbd1a4feb7301a8ea93507c47ddbf75ac1343f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:41:13 +0000 Subject: [PATCH 14/20] handle dates as strings --- backend/condition/utils/date_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/backend/condition/utils/date_utils.py b/backend/condition/utils/date_utils.py index 4535acd9..713d151c 100644 --- a/backend/condition/utils/date_utils.py +++ b/backend/condition/utils/date_utils.py @@ -5,6 +5,14 @@ from typing import Any def normalise_date(value: Any, allow_none: bool = True) -> date | None: if value is None and allow_none: return None + if isinstance(value, datetime): return value.date() + + if isinstance(value, str): + try: + return datetime.strptime(value.strip(), "%d/%m/%Y").date() + except ValueError as exc: + raise ValueError(f"Invalid date string: {value!r}") from exc + raise ValueError(f"Unexpected date value: {value!r}") \ No newline at end of file From 13daa62bdf9bc25a7538186b8bd047e231a5c75a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 19 Jan 2026 16:51:18 +0000 Subject: [PATCH 15/20] =?UTF-8?q?map=20correct=20uprn=20when=20parsing=20a?= =?UTF-8?q?sset=20conditions=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 29 ++++++++++++++----- .../tests/parsing/test_lbwf_parser.py | 12 ++++---- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 2d6463c9..293311b8 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -13,8 +13,7 @@ class LbwfParser(Parser): def parse(self, file_stream: BinaryIO) -> Any: wb = load_workbook(file_stream) - urn_to_address_map: Dict[str, int] = LbwfParser._map_uprn_to_address(wb) - print(urn_to_address_map) + address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(wb) assets_sheet: Workbook = wb["Houses Asset Data"] rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) @@ -25,19 +24,26 @@ class LbwfParser(Parser): for row in rows: try: - assets.append(LbwfParser._map_row_to_asset_record(row, header_indexes)) + assets.append(LbwfParser._map_row_to_asset_record(row, header_indexes, address_to_uprn_map)) except Exception as e: logger.error(f"Error mapping LBWF row to asset record: {e}") + print(assets) return assets @staticmethod - def _map_row_to_asset_record(row: Any | Tuple[object | None, ...], header_indexes: Dict[str, int]) -> LbwfAssetCondition: + def _map_row_to_asset_record( + row: Any | Tuple[object | None, ...], + header_indexes: Dict[str, int], + address_to_uprn_map: Dict[str, int] + ) -> LbwfAssetCondition: + address: str = row[header_indexes["ADDRESS"]] + return LbwfAssetCondition( - uprn=0, #placeholder + uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), prop_ref=row[header_indexes["PROP REF"]], domna=row[header_indexes["Domna"]], - address=row[header_indexes["ADDRESS"]], + address=address, ownership=row[header_indexes["OWNERSHIP"]], prop_status=row[header_indexes["PROP STATUS"]], prop_type=row[header_indexes["PROP TYPE"]], @@ -57,8 +63,7 @@ class LbwfParser(Parser): ) @staticmethod - def _map_uprn_to_address(wb: Workbook) -> Dict[str, int | None]: - print(wb.sheetnames) + def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]: sheet: Workbook = wb["All Energy Breakdown "] rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True) @@ -96,4 +101,12 @@ class LbwfParser(Parser): index[header] = i return index + + def _get_uprn_from_address(address: str, address_to_uprn_map: Dict[str, int]) -> int | None: + pseudo_name = address.split(",")[0] + + if pseudo_name.lower() in (k.lower() for k in address_to_uprn_map.keys()): + return address_to_uprn_map[pseudo_name.upper()] + + return None diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index e962f229..412c4c17 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -87,16 +87,16 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: ]) all_energy_breakdown.append([ 1, - 100, - 101, - "100 RANDOM ROAD", + 200, + None, + "123 FAKE STREET", "A10 1AB" ]) all_energy_breakdown.append([ 2, - 200, - None, - "123 FAKE STREET", + 100, + 101, + "100 RANDOM ROAD", "A10 1AB" ]) From dce8442fff6d42ff6118a9ac80ec5cc1ba02d716 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 09:34:23 +0000 Subject: [PATCH 16/20] =?UTF-8?q?Parse=20Houses=20sheet=20as=20well=20?= =?UTF-8?q?=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 2 +- .../records/{ => lbwf}/lbwf_asset_condition.py | 0 .../condition/parsing/records/lbwf/lbwf_house.py | 15 +++++++++++++++ .../condition/tests/parsing/test_lbwf_parser.py | 9 +++++++-- 4 files changed, 23 insertions(+), 3 deletions(-) rename backend/condition/parsing/records/{ => lbwf}/lbwf_asset_condition.py (100%) create mode 100644 backend/condition/parsing/records/lbwf/lbwf_house.py diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 293311b8..2cd34ae4 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -3,7 +3,7 @@ from openpyxl import Workbook, load_workbook from datetime import date from backend.condition.parsing.parser import Parser -from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition from backend.condition.utils.date_utils import normalise_date from utils.logger import setup_logger diff --git a/backend/condition/parsing/records/lbwf_asset_condition.py b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py similarity index 100% rename from backend/condition/parsing/records/lbwf_asset_condition.py rename to backend/condition/parsing/records/lbwf/lbwf_asset_condition.py diff --git a/backend/condition/parsing/records/lbwf/lbwf_house.py b/backend/condition/parsing/records/lbwf/lbwf_house.py new file mode 100644 index 00000000..6db16862 --- /dev/null +++ b/backend/condition/parsing/records/lbwf/lbwf_house.py @@ -0,0 +1,15 @@ +from dataclasses import dataclass +from typing import List + +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition + +@dataclass +class LbwfHouse: + uprn: int + reference: int + address: str + epc: str # TODO: make enum + shdf: bool + house: str + fail_decency: int + assets: List[LbwfAssetCondition] \ No newline at end of file diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index 412c4c17..dfa1403e 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -5,7 +5,7 @@ from openpyxl import Workbook from datetime import datetime from backend.condition.parsing.lbwf_parser import LbwfParser -from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse @pytest.fixture def lbwf_homes_xlsx_bytes() -> BytesIO: @@ -77,6 +77,11 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: "Source of Data = Joe Bloggs", ]) + houses = wb.create_sheet("Houses") + houses.append(["Reference", "Address", "EPC", "SHDF", "HOSUE", "Fail Decency"]) + houses.append([12345, "123 Fake Street, London, A10 1AB", "E", "NO", "HOUSE", 2025]) + houses.append([54321, "100 Random Road, London, A10 1AB", "F", "NO", "HOUSE", 2025]) + all_energy_breakdown = wb.create_sheet("All Energy Breakdown ") # Trailing space is intentional; matches source all_energy_breakdown.append([ "UPRN", @@ -116,6 +121,6 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): # assert # TODO: Improve these asserts assert len(result) == 2 - assert isinstance(result[0], LbwfAssetCondition) + assert isinstance(result[0], LbwfHouse) assert result[0].uprn == 1 assert result[1].uprn == 2 \ No newline at end of file From e11f59a7324e78fe16b72eca76ceedde1ac29e4c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 09:53:01 +0000 Subject: [PATCH 17/20] =?UTF-8?q?Parse=20Houses=20sheet=20as=20well=20?= =?UTF-8?q?=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 65 +++++++++++++++---- .../records/lbwf/lbwf_asset_condition.py | 1 - .../tests/parsing/test_lbwf_parser.py | 10 ++- 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 2cd34ae4..3c2fbf93 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,9 +1,11 @@ from typing import BinaryIO, Any, Dict, Iterator, List, Tuple from openpyxl import Workbook, load_workbook from datetime import date +from collections import defaultdict from backend.condition.parsing.parser import Parser from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse from backend.condition.utils.date_utils import normalise_date from utils.logger import setup_logger @@ -15,35 +17,71 @@ class LbwfParser(Parser): wb = load_workbook(file_stream) address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(wb) + # Parse assets assets_sheet: Workbook = wb["Houses Asset Data"] - rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) - headers = next(rows) - header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + asset_rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) + asset_headers = next(asset_rows) + asset_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(asset_headers) assets: List[LbwfAssetCondition] = [] - - for row in rows: + for row in asset_rows: try: - assets.append(LbwfParser._map_row_to_asset_record(row, header_indexes, address_to_uprn_map)) + assets.append(LbwfParser._map_row_to_asset_record(row, asset_header_indexes)) except Exception as e: logger.error(f"Error mapping LBWF row to asset record: {e}") - print(assets) - return assets + # Parse houses + houses_sheet: Workbook = wb["Houses"] + house_rows: Iterator[Tuple[object | None, ...]] = houses_sheet.iter_rows(values_only=True) + house_headers = next(house_rows) + house_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(house_headers) + houses: List[LbwfHouse] = [] + for row in house_rows: + try: + houses.append(LbwfParser._map_row_to_house_record(row, house_header_indexes, address_to_uprn_map)) + except Exception as e: + logger.error(f"Error mapping LBWF row to house record: {e}") + + # Merge assets and houses by Reference + assets_by_ref: Dict[int, List[LbwfAssetCondition]] = defaultdict(list) + for asset in assets: + assets_by_ref[asset.prop_ref].append(asset) + + for house in houses: + house.assets = assets_by_ref.get(house.reference, []) + + return houses + + + @staticmethod + def _map_row_to_house_record( + row: Any | Tuple[object | None, ...], + header_indexes: Dict[str, int], + address_to_uprn_map: Dict[str, int], + ) -> LbwfHouse: + address: str = row[header_indexes["Address"]] + + return LbwfHouse( + uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), + reference=row[header_indexes["Reference"]], + address=address, + epc=row[header_indexes["EPC"]], + shdf=row[header_indexes["SHDF"]], + house=row[header_indexes["HOSUE"]], + fail_decency=row[header_indexes["Fail Decency"]], + assets=[], + ) + @staticmethod def _map_row_to_asset_record( row: Any | Tuple[object | None, ...], header_indexes: Dict[str, int], - address_to_uprn_map: Dict[str, int] ) -> LbwfAssetCondition: - address: str = row[header_indexes["ADDRESS"]] - return LbwfAssetCondition( - uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), prop_ref=row[header_indexes["PROP REF"]], domna=row[header_indexes["Domna"]], - address=address, + address=row[header_indexes["ADDRESS"]], ownership=row[header_indexes["OWNERSHIP"]], prop_status=row[header_indexes["PROP STATUS"]], prop_type=row[header_indexes["PROP TYPE"]], @@ -62,6 +100,7 @@ class LbwfParser(Parser): element_comments=row[header_indexes["ELEMENT COMMENTS"]], ) + @staticmethod def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]: sheet: Workbook = wb["All Energy Breakdown "] diff --git a/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py index 3955350b..dffd1e53 100644 --- a/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py +++ b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py @@ -4,7 +4,6 @@ from datetime import date @dataclass class LbwfAssetCondition: - uprn: int prop_ref: int domna: int address: str diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index dfa1403e..78dbddad 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -5,6 +5,7 @@ from openpyxl import Workbook from datetime import datetime from backend.condition.parsing.lbwf_parser import LbwfParser +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse @pytest.fixture @@ -121,6 +122,13 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): # assert # TODO: Improve these asserts assert len(result) == 2 + assert isinstance(result[0], LbwfHouse) assert result[0].uprn == 1 - assert result[1].uprn == 2 \ No newline at end of file + assert len(result[0].assets) == 1 + assert isinstance(result[0].assets[0], LbwfAssetCondition) + + assert isinstance(result[1], LbwfHouse) + assert result[1].uprn == 2 + assert len(result[1].assets) == 1 + assert isinstance(result[1].assets[0], LbwfAssetCondition) \ No newline at end of file From 01bf0dbd9c7aeb5f0b3ca3c4053d9bde0d9f6e4e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 09:58:20 +0000 Subject: [PATCH 18/20] =?UTF-8?q?Parse=20Houses=20sheet=20as=20well=20?= =?UTF-8?q?=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/condition/parsing/lbwf_parser.py | 58 ++++++++++++++++++------ 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 3c2fbf93..44ae05f7 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -14,36 +14,66 @@ logger = setup_logger class LbwfParser(Parser): def parse(self, file_stream: BinaryIO) -> Any: - wb = load_workbook(file_stream) - address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(wb) + wb: Workbook = load_workbook(file_stream) + address_to_uprn_map: Dict[str, int] = self._generate_address_to_uprn_dict(wb) + + assets = self._parse_assets(wb) + houses = self._parse_houses(wb, address_to_uprn_map) + + self._merge_assets_into_houses(assets, houses) + + return houses + + @staticmethod + def _parse_assets(wb: Workbook) -> List[LbwfAssetCondition]: + assets_sheet = wb["Houses Asset Data"] + asset_rows = assets_sheet.iter_rows(values_only=True) - # Parse assets - assets_sheet: Workbook = wb["Houses Asset Data"] - asset_rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) asset_headers = next(asset_rows) - asset_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(asset_headers) + asset_header_indexes = LbwfParser._get_column_indexes_by_name(asset_headers) assets: List[LbwfAssetCondition] = [] for row in asset_rows: try: - assets.append(LbwfParser._map_row_to_asset_record(row, asset_header_indexes)) + assets.append( + LbwfParser._map_row_to_asset_record(row, asset_header_indexes) + ) except Exception as e: logger.error(f"Error mapping LBWF row to asset record: {e}") - # Parse houses - houses_sheet: Workbook = wb["Houses"] - house_rows: Iterator[Tuple[object | None, ...]] = houses_sheet.iter_rows(values_only=True) + return assets + + @staticmethod + def _parse_houses( + wb: Workbook, + address_to_uprn_map: Dict[str, int], + ) -> List[LbwfHouse]: + houses_sheet = wb["Houses"] + house_rows = houses_sheet.iter_rows(values_only=True) + house_headers = next(house_rows) - house_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(house_headers) + house_header_indexes = LbwfParser._get_column_indexes_by_name(house_headers) houses: List[LbwfHouse] = [] for row in house_rows: try: - houses.append(LbwfParser._map_row_to_house_record(row, house_header_indexes, address_to_uprn_map)) + houses.append( + LbwfParser._map_row_to_house_record( + row, + house_header_indexes, + address_to_uprn_map, + ) + ) except Exception as e: logger.error(f"Error mapping LBWF row to house record: {e}") - # Merge assets and houses by Reference + return houses + + @staticmethod + def _merge_assets_into_houses( + assets: List[LbwfAssetCondition], + houses: List[LbwfHouse], + ) -> None: assets_by_ref: Dict[int, List[LbwfAssetCondition]] = defaultdict(list) for asset in assets: assets_by_ref[asset.prop_ref].append(asset) @@ -51,8 +81,6 @@ class LbwfParser(Parser): for house in houses: house.assets = assets_by_ref.get(house.reference, []) - return houses - @staticmethod def _map_row_to_house_record( From 7846f9c949a64f133b7f6afa971ba65f37fba2f9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 10:34:09 +0000 Subject: [PATCH 19/20] Adjust EPC column name to correctly include trailing space --- backend/condition/parsing/lbwf_parser.py | 5 +++-- backend/condition/processor.py | 4 +++- backend/condition/tests/parsing/test_lbwf_parser.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 44ae05f7..8d52f6d5 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,6 +1,5 @@ from typing import BinaryIO, Any, Dict, Iterator, List, Tuple from openpyxl import Workbook, load_workbook -from datetime import date from collections import defaultdict from backend.condition.parsing.parser import Parser @@ -40,6 +39,7 @@ class LbwfParser(Parser): ) except Exception as e: logger.error(f"Error mapping LBWF row to asset record: {e}") + continue return assets @@ -66,6 +66,7 @@ class LbwfParser(Parser): ) except Exception as e: logger.error(f"Error mapping LBWF row to house record: {e}") + continue return houses @@ -94,7 +95,7 @@ class LbwfParser(Parser): uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), reference=row[header_indexes["Reference"]], address=address, - epc=row[header_indexes["EPC"]], + epc=row[header_indexes["EPC "]], shdf=row[header_indexes["SHDF"]], house=row[header_indexes["HOSUE"]], fail_decency=row[header_indexes["Fail Decency"]], diff --git a/backend/condition/processor.py b/backend/condition/processor.py index 3939ba08..fb06c888 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -13,4 +13,6 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None: parser: Parser = select_parser(file_type) # Orchestration - records: List[Any] = parser.parse(file_stream) \ No newline at end of file + records: List[Any] = parser.parse(file_stream) + + print(records) # temp \ No newline at end of file diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index 78dbddad..7556b845 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -79,7 +79,7 @@ def lbwf_homes_xlsx_bytes() -> BytesIO: ]) houses = wb.create_sheet("Houses") - houses.append(["Reference", "Address", "EPC", "SHDF", "HOSUE", "Fail Decency"]) + houses.append(["Reference", "Address", "EPC ", "SHDF", "HOSUE", "Fail Decency"]) houses.append([12345, "123 Fake Street, London, A10 1AB", "E", "NO", "HOUSE", 2025]) houses.append([54321, "100 Random Road, London, A10 1AB", "F", "NO", "HOUSE", 2025]) From 07cab931e509ec5b3f6d0f3a2b6f44d45f336472 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 20 Jan 2026 11:02:59 +0000 Subject: [PATCH 20/20] correction to gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 625277a5..6268360b 100644 --- a/.gitignore +++ b/.gitignore @@ -243,7 +243,7 @@ local_data/* /local_data/* etl/epc/local_data/* /backend/condition/sample_data/lbwf/* -/backend/condition/sample_data/peadody/* +/backend/condition/sample_data/peabody/* *.DS_Store infrastructure/terraform/.terraform*