get uprn from reference:uprn file. parsing test fails for now, need to fix

This commit is contained in:
Daniel Roth 2026-01-29 17:19:03 +00:00
parent d83d73c129
commit d26ea2863b
3 changed files with 36 additions and 17 deletions

View file

@ -20,9 +20,9 @@ def main() -> None:
/ "peabody" / "peabody"
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx" / "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
) )
# filepaths = [lbwf_path, peabody_path] filepaths = [lbwf_path, peabody_path]
# filepaths = [lbwf_path] # filepaths = [lbwf_path]
filepaths = [peabody_path] # filepaths = [peabody_path]
for fp in filepaths: for fp in filepaths:
with fp.open("rb") as f: with fp.open("rb") as f:

View file

@ -1,8 +1,9 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import BinaryIO, Any from typing import BinaryIO, Any
class Parser(ABC): class Parser(ABC):
@abstractmethod @abstractmethod
def parse(self, file_stream: BinaryIO) -> Any: def parse(self, file_stream: BinaryIO) -> Any:
pass pass

View file

@ -1,3 +1,5 @@
import csv
from pathlib import Path
from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict
from openpyxl import Workbook, load_workbook from openpyxl import Workbook, load_workbook
from collections import defaultdict from collections import defaultdict
@ -15,17 +17,34 @@ logger = setup_logger()
class PeabodyParser(Parser): class PeabodyParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any: def parse(self, file_stream: BinaryIO) -> Any:
wb: Workbook = load_workbook(file_stream) wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = ( location_ref_to_uprn_map: Dict[str, int] = (
PeabodyParser._generate_address_to_uprn_dict(wb) PeabodyParser._build_location_ref_to_uprn_map()
) )
assets = self._parse_assets(wb) assets = PeabodyParser._parse_assets(wb)
return self._group_assets_into_properties( return PeabodyParser._group_assets_into_properties(
assets=assets, assets=assets,
address_to_uprn_map=address_to_uprn_map, location_ref_to_uprn_map=location_ref_to_uprn_map,
) )
@staticmethod
def _build_location_ref_to_uprn_map() -> Dict[str, int]:
location_ref_to_uprn_filepath: Path = (
Path(__file__).resolve().parents[1]
/ "sample_data"
/ "peabody"
/ "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
)
location_ref_to_uprn_map: Dict[str, int] = {}
with location_ref_to_uprn_filepath.open(newline="") as f:
reader: Any = csv.DictReader(f)
for row in reader:
location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"])
return location_ref_to_uprn_map
@staticmethod @staticmethod
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]: def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
assets_sheet = wb["Survey Records - D & Lower"] assets_sheet = wb["Survey Records - D & Lower"]
@ -54,27 +73,26 @@ class PeabodyParser(Parser):
@staticmethod @staticmethod
def _group_assets_into_properties( def _group_assets_into_properties(
assets: List[PeabodyAssetCondition], assets: List[PeabodyAssetCondition],
address_to_uprn_map: Dict[str, int], location_ref_to_uprn_map: Dict[str, int],
) -> List[PeabodyProperty]: ) -> List[PeabodyProperty]:
assets_by_address: DefaultDict[str, List[PeabodyAssetCondition]] = defaultdict( assets_by_location_reference: DefaultDict[str, List[PeabodyAssetCondition]] = (
list defaultdict(list)
) )
for asset in assets: for asset in assets:
if asset.full_address is None: if asset.lo_reference is None:
continue continue
address = asset.full_address.strip() assets_by_location_reference[asset.lo_reference].append(asset)
assets_by_address[address].append(asset)
properties: List[PeabodyProperty] = [] properties: List[PeabodyProperty] = []
for address, grouped_assets in assets_by_address.items(): for location_ref, grouped_assets in assets_by_location_reference.items():
uprn = address_to_uprn_map.get(address) uprn = location_ref_to_uprn_map.get(location_ref)
if uprn is None: if uprn is None:
logger.warning(f"No UPRN found for address: {address}") logger.warning(f"No UPRN found for Location Reference: {location_ref}")
continue continue
properties.append( properties.append(