diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 2cd34ae4..3c2fbf93 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -1,9 +1,11 @@ from typing import BinaryIO, Any, Dict, Iterator, List, Tuple from openpyxl import Workbook, load_workbook from datetime import date +from collections import defaultdict from backend.condition.parsing.parser import Parser from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse from backend.condition.utils.date_utils import normalise_date from utils.logger import setup_logger @@ -15,35 +17,71 @@ class LbwfParser(Parser): wb = load_workbook(file_stream) address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(wb) + # Parse assets assets_sheet: Workbook = wb["Houses Asset Data"] - rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) - headers = next(rows) - header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + asset_rows: Iterator[Tuple[object | None, ...]] = assets_sheet.iter_rows(values_only=True) + asset_headers = next(asset_rows) + asset_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(asset_headers) assets: List[LbwfAssetCondition] = [] - - for row in rows: + for row in asset_rows: try: - assets.append(LbwfParser._map_row_to_asset_record(row, header_indexes, address_to_uprn_map)) + assets.append(LbwfParser._map_row_to_asset_record(row, asset_header_indexes)) except Exception as e: logger.error(f"Error mapping LBWF row to asset record: {e}") - print(assets) - return assets + # Parse houses + houses_sheet: Workbook = wb["Houses"] + house_rows: Iterator[Tuple[object | None, ...]] = houses_sheet.iter_rows(values_only=True) + house_headers = next(house_rows) + house_header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(house_headers) + houses: List[LbwfHouse] = [] + for row in house_rows: + try: + houses.append(LbwfParser._map_row_to_house_record(row, house_header_indexes, address_to_uprn_map)) + except Exception as e: + logger.error(f"Error mapping LBWF row to house record: {e}") + + # Merge assets and houses by Reference + assets_by_ref: Dict[int, List[LbwfAssetCondition]] = defaultdict(list) + for asset in assets: + assets_by_ref[asset.prop_ref].append(asset) + + for house in houses: + house.assets = assets_by_ref.get(house.reference, []) + + return houses + + + @staticmethod + def _map_row_to_house_record( + row: Any | Tuple[object | None, ...], + header_indexes: Dict[str, int], + address_to_uprn_map: Dict[str, int], + ) -> LbwfHouse: + address: str = row[header_indexes["Address"]] + + return LbwfHouse( + uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), + reference=row[header_indexes["Reference"]], + address=address, + epc=row[header_indexes["EPC"]], + shdf=row[header_indexes["SHDF"]], + house=row[header_indexes["HOSUE"]], + fail_decency=row[header_indexes["Fail Decency"]], + assets=[], + ) + @staticmethod def _map_row_to_asset_record( row: Any | Tuple[object | None, ...], header_indexes: Dict[str, int], - address_to_uprn_map: Dict[str, int] ) -> LbwfAssetCondition: - address: str = row[header_indexes["ADDRESS"]] - return LbwfAssetCondition( - uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), prop_ref=row[header_indexes["PROP REF"]], domna=row[header_indexes["Domna"]], - address=address, + address=row[header_indexes["ADDRESS"]], ownership=row[header_indexes["OWNERSHIP"]], prop_status=row[header_indexes["PROP STATUS"]], prop_type=row[header_indexes["PROP TYPE"]], @@ -62,6 +100,7 @@ class LbwfParser(Parser): element_comments=row[header_indexes["ELEMENT COMMENTS"]], ) + @staticmethod def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]: sheet: Workbook = wb["All Energy Breakdown "] diff --git a/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py index 3955350b..dffd1e53 100644 --- a/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py +++ b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py @@ -4,7 +4,6 @@ from datetime import date @dataclass class LbwfAssetCondition: - uprn: int prop_ref: int domna: int address: str diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py index dfa1403e..78dbddad 100644 --- a/backend/condition/tests/parsing/test_lbwf_parser.py +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -5,6 +5,7 @@ from openpyxl import Workbook from datetime import datetime from backend.condition.parsing.lbwf_parser import LbwfParser +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse @pytest.fixture @@ -121,6 +122,13 @@ def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): # assert # TODO: Improve these asserts assert len(result) == 2 + assert isinstance(result[0], LbwfHouse) assert result[0].uprn == 1 - assert result[1].uprn == 2 \ No newline at end of file + assert len(result[0].assets) == 1 + assert isinstance(result[0].assets[0], LbwfAssetCondition) + + assert isinstance(result[1], LbwfHouse) + assert result[1].uprn == 2 + assert len(result[1].assets) == 1 + assert isinstance(result[1].assets[0], LbwfAssetCondition) \ No newline at end of file