optionally inject uprn lookup into parser method, for testing purposes 🟩

This commit is contained in:
Daniel Roth 2026-01-30 09:32:34 +00:00
parent d26ea2863b
commit f253bbbf8b
4 changed files with 167 additions and 155 deletions

View file

@ -1,4 +1,4 @@
from typing import BinaryIO, Any, Dict, Iterator, List, Tuple from typing import BinaryIO, Any, Dict, Iterator, List, Optional, Tuple
from openpyxl import Workbook, load_workbook from openpyxl import Workbook, load_workbook
from collections import defaultdict from collections import defaultdict
@ -15,7 +15,11 @@ logger = setup_logger()
class LbwfParser(Parser): class LbwfParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any: def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
wb: Workbook = load_workbook(file_stream) wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict( address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(
wb wb

View file

@ -1,9 +1,13 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import BinaryIO, Any from typing import BinaryIO, Any, Dict, Optional
class Parser(ABC): class Parser(ABC):
@abstractmethod @abstractmethod
def parse(self, file_stream: BinaryIO) -> Any: def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
pass pass

View file

@ -1,6 +1,6 @@
import csv import csv
from pathlib import Path from pathlib import Path
from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
from openpyxl import Workbook, load_workbook from openpyxl import Workbook, load_workbook
from collections import defaultdict from collections import defaultdict
@ -15,11 +15,17 @@ logger = setup_logger()
class PeabodyParser(Parser): class PeabodyParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any: def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
wb: Workbook = load_workbook(file_stream) wb: Workbook = load_workbook(file_stream)
location_ref_to_uprn_map: Dict[str, int] = (
PeabodyParser._build_location_ref_to_uprn_map() if location_ref_to_uprn_map is None:
) location_ref_to_uprn_map: Dict[str, int] = (
PeabodyParser._build_location_ref_to_uprn_map()
)
assets = PeabodyParser._parse_assets(wb) assets = PeabodyParser._parse_assets(wb)
@ -128,37 +134,6 @@ class PeabodyParser(Parser):
condition_survey_date=row[header_indexes["condition_survey_date"]], condition_survey_date=row[header_indexes["condition_survey_date"]],
) )
@staticmethod
def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]:
sheet = wb["Survey Records - D & Lower"]
rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
headers = next(rows)
header_indexes: Dict[str, int] = PeabodyParser._get_column_indexes_by_name(
headers
)
address_idx = header_indexes["full_address"]
address_to_uprn: Dict[str, int] = {}
# Generate random UPRNs for now
next_uprn = 1 # TODO: get real UPRNs
for row in rows:
address = row[address_idx]
if address is None:
continue
address = address.strip()
if address not in address_to_uprn:
address_to_uprn[address] = next_uprn
next_uprn += 1
return address_to_uprn
@staticmethod @staticmethod
def _get_column_indexes_by_name( def _get_column_indexes_by_name(
headers: Tuple[object | None, ...], headers: Tuple[object | None, ...],

View file

@ -1,127 +1,141 @@
import pytest import pytest
from typing import Any from typing import Any, Dict
from io import BytesIO from io import BytesIO
from openpyxl import Workbook from openpyxl import Workbook
from datetime import datetime from datetime import datetime
from backend.condition.parsing.peabody_parser import PeabodyParser from backend.condition.parsing.peabody_parser import PeabodyParser
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition,
)
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
@pytest.fixture @pytest.fixture
def peabody_assets_xlsx_bytes() -> BytesIO: def peabody_assets_xlsx_bytes() -> BytesIO:
wb = Workbook() wb = Workbook()
survey_records_d_and_lower = wb.active survey_records_d_and_lower = wb.active
survey_records_d_and_lower.title = "Survey Records - D & Lower" survey_records_d_and_lower.title = "Survey Records - D & Lower"
survey_records_d_and_lower.append([ survey_records_d_and_lower.append(
"Lo_Reference", [
"full_address", "Lo_Reference",
"location_type_code", "full_address",
"Parent_Lo_Reference", "location_type_code",
"Element_Code", "Parent_Lo_Reference",
"Element", "Element_Code",
"Sub_Element_Code", "Element",
"Sub_Element", "Sub_Element_Code",
"Material_Code", "Sub_Element",
"material_or_answer", "Material_Code",
"Renewal_Quantity", "material_or_answer",
"Renewal_Year", "Renewal_Quantity",
"Renewal_Cost", "Renewal_Year",
"cloned", "Renewal_Cost",
"lo_type_code", "cloned",
"condition_survey_date", "lo_type_code",
]) "condition_survey_date",
survey_records_d_and_lower.append([ ]
"B000RAND", )
"1 RANDOM HOUSE LONDON", survey_records_d_and_lower.append(
3, [
"RAND2EST", "B000RAND",
110, "1 RANDOM HOUSE LONDON",
"ROOFS", 3,
1, "RAND2EST",
"Primary Roof", 110,
9, "ROOFS",
"Other", 1,
3, "Primary Roof",
2054, 9,
330, "Other",
"N", 3,
3, 2054,
datetime(2025,12,4,9,17,0) 330,
]) "N",
survey_records_d_and_lower.append([ 3,
"B000BLOCK", datetime(2025, 12, 4, 9, 17, 0),
"1100 BLOCK", ]
3, )
"RAND2EST", survey_records_d_and_lower.append(
110, [
"ROOFS", "B000BLOCK",
1, "1100 BLOCK",
"Primary Roof", 3,
9, "RAND2EST",
"Other", 110,
3, "ROOFS",
2054, 1,
330, "Primary Roof",
"N", 9,
3, "Other",
datetime(2025,12,4,9,17,0) 3,
]) 2054,
survey_records_d_and_lower.append([ 330,
"B000FAKE", "N",
"3 FAKE CLOSE LONDON", 3,
3, datetime(2025, 12, 4, 9, 17, 0),
"FAKEEST", ]
100, )
"GENERAL", survey_records_d_and_lower.append(
15, [
"External Decoration", "B000FAKE",
2, "3 FAKE CLOSE LONDON",
"Normal", 3,
1, "FAKEEST",
2035, 100,
1500.7, "GENERAL",
"N", 15,
3, "External Decoration",
datetime(2025,7,5,0,0,0) 2,
]) "Normal",
survey_records_d_and_lower.append([ 1,
"B000MIS", 2035,
"99 MISC ROAD LONDON", 1500.7,
3, "N",
"300828", 3,
54, datetime(2025, 7, 5, 0, 0, 0),
"HHSRS", ]
29, )
"HHSRS Structural Collapse & Falling Elements", survey_records_d_and_lower.append(
4, [
"HHSRS Moderate", "B000MIS",
2, "99 MISC ROAD LONDON",
2027, 3,
None, "300828",
"N", 54,
3, "HHSRS",
None 29,
]) "HHSRS Structural Collapse & Falling Elements",
survey_records_d_and_lower.append([ 4,
"B000MIS", "HHSRS Moderate",
"99 MISC ROAD LONDON", 2,
3, 2027,
"300828", None,
53, "N",
"External", 3,
2, None,
"Chimney", ]
2, )
"Present", survey_records_d_and_lower.append(
33, [
2053, "B000MIS",
3531, "99 MISC ROAD LONDON",
"N", 3,
3, "300828",
None 53,
]) "External",
2,
"Chimney",
2,
"Present",
33,
2053,
3531,
"N",
3,
None,
]
)
stream = BytesIO() stream = BytesIO()
wb.save(stream) wb.save(stream)
@ -129,18 +143,32 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
return stream return stream
def test_peabody_parser_parses_conditions(peabody_assets_xlsx_bytes):
@pytest.fixture
def location_ref_to_uprn_map() -> Dict[str, int]:
return {
"B000RAND": 1,
"B000BLOCK": 2,
"B000FAKE": 3,
"B000MIS": 4,
}
def test_peabody_parser_parses_conditions(
peabody_assets_xlsx_bytes, location_ref_to_uprn_map
):
# arrange # arrange
parser = PeabodyParser() parser = PeabodyParser()
# act # act
result: Any = parser.parse(peabody_assets_xlsx_bytes) result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map)
# assert # assert
assert len(result) == 3 assert len(result) == 3
assert all(isinstance(item, PeabodyProperty) for item in result) assert all(isinstance(item, PeabodyProperty) for item in result)
@pytest.fixture @pytest.fixture
def asset_condition_factory(): def asset_condition_factory():
def _factory(full_address: str) -> PeabodyAssetCondition: def _factory(full_address: str) -> PeabodyAssetCondition:
@ -165,6 +193,7 @@ def asset_condition_factory():
return _factory return _factory
@pytest.mark.parametrize( @pytest.mark.parametrize(
"full_address, expected_block_level", "full_address, expected_block_level",
[ [
@ -175,7 +204,7 @@ def asset_condition_factory():
("81A-B GORE ROAD LONDON", True), ("81A-B GORE ROAD LONDON", True),
("73 & 74 HARVEST COURT ST. ALBANS", True), ("73 & 74 HARVEST COURT ST. ALBANS", True),
("25 HAVERSHAM COURT GREENFORD", False), ("25 HAVERSHAM COURT GREENFORD", False),
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False) ("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False),
], ],
) )
def test_peabody_asset_is_block_level( def test_peabody_asset_is_block_level(
@ -187,4 +216,4 @@ def test_peabody_asset_is_block_level(
asset_condition = asset_condition_factory(full_address) asset_condition = asset_condition_factory(full_address)
# act + assert # act + assert
assert asset_condition.is_block_level == expected_block_level assert asset_condition.is_block_level == expected_block_level