From 6ff652ff3492222633e118d27619121aa2a65800 Mon Sep 17 00:00:00 2001
From: Daniel Roth <daniel_roth@hotmail.co.uk>
Date: Mon, 19 Jan 2026 16:23:11 +0000
Subject: [PATCH] =?UTF-8?q?parse=20lbwf=20houses=20=F0=9F=9F=A9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .devcontainer/devcontainer.json               |  3 +
 .vscode/launch.json                           | 15 +++
 .vscode/settings.json                         |  1 +
 backend/condition/parsing/lbwf_parser.py      | 96 ++++++++++++++++++-
 ...y_condition.py => lbwf_asset_condition.py} |  2 +-
 backend/condition/processor.py                |  3 +-
 .../tests/parsing/test_lbwf_parser.py         |  8 +-
 backend/condition/utils/date_utils.py         | 10 ++
 8 files changed, 128 insertions(+), 10 deletions(-)
 create mode 100644 .vscode/launch.json
 rename backend/condition/parsing/records/{lbwf_property_condition.py => lbwf_asset_condition.py} (95%)
 create mode 100644 backend/condition/utils/date_utils.py

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 91a76c3d..761786cd 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -27,5 +27,8 @@
         "ms-python.vscode-python-envs"
       ]
     }
+  },
+  "containerEnv": {
+    "PYTHONFLAGS": "-Xfrozen_modules=off"
   }
 }
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 00000000..6b76b4fa
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python Debugger: Current File",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 9a9ea9f8..88c2ae2d 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -11,6 +11,7 @@
     },
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
+    "python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
 
     // Hot reload setting that needs to be in user settings
     // "jupyter.runStartupCommands": [
diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py
index 7404189f..fd65e24a 100644
--- a/backend/condition/parsing/lbwf_parser.py
+++ b/backend/condition/parsing/lbwf_parser.py
@@ -1,9 +1,99 @@
-from typing import BinaryIO, Any
+from typing import BinaryIO, Any, Dict, Iterator, List, Tuple
+from openpyxl import Workbook, load_workbook
+from datetime import date
 
 from backend.condition.parsing.parser import Parser
-from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition
+from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition
+from backend.condition.utils.date_utils import normalise_date
+from utils.logger import setup_logger
+
+logger = setup_logger
 
 class LbwfParser(Parser):
 
     def parse(self, file_stream: BinaryIO) -> Any:
-        raise NotImplementedError
\ No newline at end of file
+        wb = load_workbook(file_stream)
+        urn_to_address_map: Dict[str, int] = LbwfParser._map_uprn_to_address(wb)
+        print(urn_to_address_map)
+
+        assets_sheet: Workbook = wb["Houses Asset Data"]
+        rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True)
+        headers = next(rows)
+        header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers)
+
+        assets: List[LbwfAssetCondition] = []
+
+        for row in rows:
+            try:
+                install_date: date = normalise_date(row[header_indexes["INSTALL DATE"]])
+            except ValueError as e:
+                logger.error(f"Failed to process LBWF asset due to badly formatted date: {e}")
+                continue
+
+            assets.append(LbwfAssetCondition(
+                uprn=0, #placeholder
+                prop_ref=row[header_indexes["PROP REF"]],
+                domna=row[header_indexes["Domna"]],
+                address=row[header_indexes["ADDRESS"]],
+                ownership=row[header_indexes["OWNERSHIP"]],
+                prop_status=row[header_indexes["PROP STATUS"]],
+                prop_type=row[header_indexes["PROP TYPE"]],
+                prop_sub_type=row[header_indexes["PROP SUB TYPE"]],
+                element_group=row[header_indexes["ELEMENT GROUP"]],
+                element_code=row[header_indexes["ELEMENT CODE"]],
+                element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]],
+                attribute_code=row[header_indexes["ATTRIBUTE CODE"]],
+                attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]],
+                element_date_value=row[header_indexes["ELEMENT DATE VALUE"]],
+                element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]],
+                element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]],
+                quantity=row[header_indexes["QUANTITY"]],
+                install_date=install_date,
+                remaining_life=row[header_indexes["REMAINING LIFE"]],
+                element_comments=row[header_indexes["ELEMENT COMMENTS"]],
+            ))
+        
+        return assets
+
+    
+    @staticmethod
+    def _map_uprn_to_address(wb: Workbook) -> Dict[str, int | None]:
+        print(wb.sheetnames)
+        sheet: Workbook = wb["All Energy Breakdown "]
+
+        rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
+
+        headers = next(rows)
+        header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers)
+
+        address_idx = header_indexes["Address"]
+        uprn_idx = header_indexes["UPRN"]
+
+        mapping: Dict[str, int | None] = {}
+
+        for row in rows:
+            address = row[address_idx]
+            uprn = row[uprn_idx]
+
+            if not isinstance(address, str):
+                continue
+
+            if uprn is not None and not isinstance(uprn, int):
+                raise ValueError(f"Unexpected UPRN value: {uprn!r}")
+
+            mapping[address] = uprn
+
+        return mapping
+
+
+    def _get_column_indexes_by_name(
+        headers: Tuple[object | None, ...]
+    ) -> Dict[str, int]:
+        index: Dict[str, int] = {}
+
+        for i, header in enumerate(headers):
+            if isinstance(header, str):
+                index[header] = i
+
+        return index
+        
diff --git a/backend/condition/parsing/records/lbwf_property_condition.py b/backend/condition/parsing/records/lbwf_asset_condition.py
similarity index 95%
rename from backend/condition/parsing/records/lbwf_property_condition.py
rename to backend/condition/parsing/records/lbwf_asset_condition.py
index 1ecd00d6..3955350b 100644
--- a/backend/condition/parsing/records/lbwf_property_condition.py
+++ b/backend/condition/parsing/records/lbwf_asset_condition.py
@@ -3,7 +3,7 @@ from datetime import date
 
 
 @dataclass
-class LbwfPropertyCondition:
+class LbwfAssetCondition:
     uprn: int
     prop_ref: int
     domna: int
diff --git a/backend/condition/processor.py b/backend/condition/processor.py
index f19c4257..3939ba08 100644
--- a/backend/condition/processor.py
+++ b/backend/condition/processor.py
@@ -1,4 +1,4 @@
-from typing import BinaryIO, List
+from typing import Any, BinaryIO, List
 
 from backend.condition.parsing.parser import Parser
 from utils.logger import setup_logger
@@ -13,3 +13,4 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None:
     parser: Parser = select_parser(file_type)
 
     # Orchestration
+    records: List[Any] = parser.parse(file_stream)
\ No newline at end of file
diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py
index d9ed1e90..6a93979a 100644
--- a/backend/condition/tests/parsing/test_lbwf_parser.py
+++ b/backend/condition/tests/parsing/test_lbwf_parser.py
@@ -3,10 +3,9 @@ import pytest
 from io import BytesIO
 from openpyxl import Workbook
 from datetime import datetime
-import debugpy
 
 from backend.condition.parsing.lbwf_parser import LbwfParser
-from backend.condition.parsing.records.lbwf_property_condition import LbwfPropertyCondition
+from backend.condition.parsing.records.lbwf_asset_condition import LbwfAssetCondition
 
 @pytest.fixture
 def lbwf_homes_xlsx_bytes() -> BytesIO:
@@ -78,7 +77,7 @@ def lbwf_homes_xlsx_bytes() -> BytesIO:
         "Source of Data = Joe Bloggs",
     ])
     
-    all_energy_breakdown = wb.create_sheet("All Energy Breakdown")
+    all_energy_breakdown = wb.create_sheet("All Energy Breakdown ") # Trailing space is intentional; matches source
     all_energy_breakdown.append([
         "UPRN",
         "Organisation Reference",
@@ -108,7 +107,6 @@ def lbwf_homes_xlsx_bytes() -> BytesIO:
     return stream
 
 def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes):
-    debugpy.wait_for_client()
     # arrange
     parser = LbwfParser()
 
@@ -117,4 +115,4 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes):
 
     # assert
     assert len(result) == 2
-    assert isinstance(result[0], LbwfPropertyCondition) # TODO: Improve these asserts
+    assert isinstance(result[0], LbwfAssetCondition) # TODO: Improve these asserts
diff --git a/backend/condition/utils/date_utils.py b/backend/condition/utils/date_utils.py
new file mode 100644
index 00000000..4535acd9
--- /dev/null
+++ b/backend/condition/utils/date_utils.py
@@ -0,0 +1,10 @@
+from datetime import datetime, date
+from typing import Any
+
+
+def normalise_date(value: Any, allow_none: bool = True) -> date | None:
+    if value is None and allow_none:
+        return None
+    if isinstance(value, datetime):
+        return value.date()
+    raise ValueError(f"Unexpected date value: {value!r}")
\ No newline at end of file