optionally inject uprn lookup into parser method, for testing purposes 🟩

This commit is contained in:
Daniel Roth 2026-01-30 09:32:34 +00:00
parent d26ea2863b
commit f253bbbf8b
4 changed files with 167 additions and 155 deletions

View file

@ -1,4 +1,4 @@
from typing import BinaryIO, Any, Dict, Iterator, List, Tuple
from typing import BinaryIO, Any, Dict, Iterator, List, Optional, Tuple
from openpyxl import Workbook, load_workbook
from collections import defaultdict
@ -15,7 +15,11 @@ logger = setup_logger()
class LbwfParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any:
def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(
wb

View file

@ -1,9 +1,13 @@
from abc import ABC, abstractmethod
from typing import BinaryIO, Any
from typing import BinaryIO, Any, Dict, Optional
class Parser(ABC):
@abstractmethod
def parse(self, file_stream: BinaryIO) -> Any:
def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
pass

View file

@ -1,6 +1,6 @@
import csv
from pathlib import Path
from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
from openpyxl import Workbook, load_workbook
from collections import defaultdict
@ -15,11 +15,17 @@ logger = setup_logger()
class PeabodyParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any:
def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
wb: Workbook = load_workbook(file_stream)
location_ref_to_uprn_map: Dict[str, int] = (
PeabodyParser._build_location_ref_to_uprn_map()
)
if location_ref_to_uprn_map is None:
location_ref_to_uprn_map: Dict[str, int] = (
PeabodyParser._build_location_ref_to_uprn_map()
)
assets = PeabodyParser._parse_assets(wb)
@ -128,37 +134,6 @@ class PeabodyParser(Parser):
condition_survey_date=row[header_indexes["condition_survey_date"]],
)
@staticmethod
def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]:
sheet = wb["Survey Records - D & Lower"]
rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
headers = next(rows)
header_indexes: Dict[str, int] = PeabodyParser._get_column_indexes_by_name(
headers
)
address_idx = header_indexes["full_address"]
address_to_uprn: Dict[str, int] = {}
# Generate random UPRNs for now
next_uprn = 1 # TODO: get real UPRNs
for row in rows:
address = row[address_idx]
if address is None:
continue
address = address.strip()
if address not in address_to_uprn:
address_to_uprn[address] = next_uprn
next_uprn += 1
return address_to_uprn
@staticmethod
def _get_column_indexes_by_name(
headers: Tuple[object | None, ...],

View file

@ -1,127 +1,141 @@
import pytest
from typing import Any
from typing import Any, Dict
from io import BytesIO
from openpyxl import Workbook
from datetime import datetime
from backend.condition.parsing.peabody_parser import PeabodyParser
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition,
)
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
@pytest.fixture
def peabody_assets_xlsx_bytes() -> BytesIO:
wb = Workbook()
survey_records_d_and_lower = wb.active
survey_records_d_and_lower.title = "Survey Records - D & Lower"
survey_records_d_and_lower.append([
"Lo_Reference",
"full_address",
"location_type_code",
"Parent_Lo_Reference",
"Element_Code",
"Element",
"Sub_Element_Code",
"Sub_Element",
"Material_Code",
"material_or_answer",
"Renewal_Quantity",
"Renewal_Year",
"Renewal_Cost",
"cloned",
"lo_type_code",
"condition_survey_date",
])
survey_records_d_and_lower.append([
"B000RAND",
"1 RANDOM HOUSE LONDON",
3,
"RAND2EST",
110,
"ROOFS",
1,
"Primary Roof",
9,
"Other",
3,
2054,
330,
"N",
3,
datetime(2025,12,4,9,17,0)
])
survey_records_d_and_lower.append([
"B000BLOCK",
"1100 BLOCK",
3,
"RAND2EST",
110,
"ROOFS",
1,
"Primary Roof",
9,
"Other",
3,
2054,
330,
"N",
3,
datetime(2025,12,4,9,17,0)
])
survey_records_d_and_lower.append([
"B000FAKE",
"3 FAKE CLOSE LONDON",
3,
"FAKEEST",
100,
"GENERAL",
15,
"External Decoration",
2,
"Normal",
1,
2035,
1500.7,
"N",
3,
datetime(2025,7,5,0,0,0)
])
survey_records_d_and_lower.append([
"B000MIS",
"99 MISC ROAD LONDON",
3,
"300828",
54,
"HHSRS",
29,
"HHSRS Structural Collapse & Falling Elements",
4,
"HHSRS Moderate",
2,
2027,
None,
"N",
3,
None
])
survey_records_d_and_lower.append([
"B000MIS",
"99 MISC ROAD LONDON",
3,
"300828",
53,
"External",
2,
"Chimney",
2,
"Present",
33,
2053,
3531,
"N",
3,
None
])
survey_records_d_and_lower.append(
[
"Lo_Reference",
"full_address",
"location_type_code",
"Parent_Lo_Reference",
"Element_Code",
"Element",
"Sub_Element_Code",
"Sub_Element",
"Material_Code",
"material_or_answer",
"Renewal_Quantity",
"Renewal_Year",
"Renewal_Cost",
"cloned",
"lo_type_code",
"condition_survey_date",
]
)
survey_records_d_and_lower.append(
[
"B000RAND",
"1 RANDOM HOUSE LONDON",
3,
"RAND2EST",
110,
"ROOFS",
1,
"Primary Roof",
9,
"Other",
3,
2054,
330,
"N",
3,
datetime(2025, 12, 4, 9, 17, 0),
]
)
survey_records_d_and_lower.append(
[
"B000BLOCK",
"1100 BLOCK",
3,
"RAND2EST",
110,
"ROOFS",
1,
"Primary Roof",
9,
"Other",
3,
2054,
330,
"N",
3,
datetime(2025, 12, 4, 9, 17, 0),
]
)
survey_records_d_and_lower.append(
[
"B000FAKE",
"3 FAKE CLOSE LONDON",
3,
"FAKEEST",
100,
"GENERAL",
15,
"External Decoration",
2,
"Normal",
1,
2035,
1500.7,
"N",
3,
datetime(2025, 7, 5, 0, 0, 0),
]
)
survey_records_d_and_lower.append(
[
"B000MIS",
"99 MISC ROAD LONDON",
3,
"300828",
54,
"HHSRS",
29,
"HHSRS Structural Collapse & Falling Elements",
4,
"HHSRS Moderate",
2,
2027,
None,
"N",
3,
None,
]
)
survey_records_d_and_lower.append(
[
"B000MIS",
"99 MISC ROAD LONDON",
3,
"300828",
53,
"External",
2,
"Chimney",
2,
"Present",
33,
2053,
3531,
"N",
3,
None,
]
)
stream = BytesIO()
wb.save(stream)
@ -129,18 +143,32 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
return stream
def test_peabody_parser_parses_conditions(peabody_assets_xlsx_bytes):
@pytest.fixture
def location_ref_to_uprn_map() -> Dict[str, int]:
return {
"B000RAND": 1,
"B000BLOCK": 2,
"B000FAKE": 3,
"B000MIS": 4,
}
def test_peabody_parser_parses_conditions(
peabody_assets_xlsx_bytes, location_ref_to_uprn_map
):
# arrange
parser = PeabodyParser()
# act
result: Any = parser.parse(peabody_assets_xlsx_bytes)
result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map)
# assert
assert len(result) == 3
assert all(isinstance(item, PeabodyProperty) for item in result)
@pytest.fixture
def asset_condition_factory():
def _factory(full_address: str) -> PeabodyAssetCondition:
@ -165,6 +193,7 @@ def asset_condition_factory():
return _factory
@pytest.mark.parametrize(
"full_address, expected_block_level",
[
@ -175,7 +204,7 @@ def asset_condition_factory():
("81A-B GORE ROAD LONDON", True),
("73 & 74 HARVEST COURT ST. ALBANS", True),
("25 HAVERSHAM COURT GREENFORD", False),
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False)
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False),
],
)
def test_peabody_asset_is_block_level(
@ -187,4 +216,4 @@ def test_peabody_asset_is_block_level(
asset_condition = asset_condition_factory(full_address)
# act + assert
assert asset_condition.is_block_level == expected_block_level
assert asset_condition.is_block_level == expected_block_level