From d26ea2863b86010c8736a0ff266cbe4ec2c40c88 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 29 Jan 2026 17:19:03 +0000 Subject: [PATCH] get uprn from reference:uprn file. parsing test fails for now, need to fix --- backend/condition/local_runner.py | 4 +- backend/condition/parsing/parser.py | 3 +- backend/condition/parsing/peabody_parser.py | 46 ++++++++++++++------- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/backend/condition/local_runner.py b/backend/condition/local_runner.py index fc6516cc..e39d38c7 100644 --- a/backend/condition/local_runner.py +++ b/backend/condition/local_runner.py @@ -20,9 +20,9 @@ def main() -> None: / "peabody" / "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx" ) - # filepaths = [lbwf_path, peabody_path] + filepaths = [lbwf_path, peabody_path] # filepaths = [lbwf_path] - filepaths = [peabody_path] + # filepaths = [peabody_path] for fp in filepaths: with fp.open("rb") as f: diff --git a/backend/condition/parsing/parser.py b/backend/condition/parsing/parser.py index 105fda36..8146789f 100644 --- a/backend/condition/parsing/parser.py +++ b/backend/condition/parsing/parser.py @@ -1,8 +1,9 @@ from abc import ABC, abstractmethod from typing import BinaryIO, Any + class Parser(ABC): @abstractmethod def parse(self, file_stream: BinaryIO) -> Any: - pass \ No newline at end of file + pass diff --git a/backend/condition/parsing/peabody_parser.py b/backend/condition/parsing/peabody_parser.py index 423c0cda..0ffcb1b9 100644 --- a/backend/condition/parsing/peabody_parser.py +++ b/backend/condition/parsing/peabody_parser.py @@ -1,3 +1,5 @@ +import csv +from pathlib import Path from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict from openpyxl import Workbook, load_workbook from collections import defaultdict @@ -15,17 +17,34 @@ logger = setup_logger() class PeabodyParser(Parser): def parse(self, file_stream: BinaryIO) -> Any: wb: Workbook = load_workbook(file_stream) - address_to_uprn_map: Dict[str, int] = ( - PeabodyParser._generate_address_to_uprn_dict(wb) + location_ref_to_uprn_map: Dict[str, int] = ( + PeabodyParser._build_location_ref_to_uprn_map() ) - assets = self._parse_assets(wb) + assets = PeabodyParser._parse_assets(wb) - return self._group_assets_into_properties( + return PeabodyParser._group_assets_into_properties( assets=assets, - address_to_uprn_map=address_to_uprn_map, + location_ref_to_uprn_map=location_ref_to_uprn_map, ) + @staticmethod + def _build_location_ref_to_uprn_map() -> Dict[str, int]: + location_ref_to_uprn_filepath: Path = ( + Path(__file__).resolve().parents[1] + / "sample_data" + / "peabody" + / "PeabodyPropertymatched_Dec25_propref_UPRN.csv" + ) + location_ref_to_uprn_map: Dict[str, int] = {} + + with location_ref_to_uprn_filepath.open(newline="") as f: + reader: Any = csv.DictReader(f) + for row in reader: + location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"]) + + return location_ref_to_uprn_map + @staticmethod def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]: assets_sheet = wb["Survey Records - D & Lower"] @@ -54,27 +73,26 @@ class PeabodyParser(Parser): @staticmethod def _group_assets_into_properties( assets: List[PeabodyAssetCondition], - address_to_uprn_map: Dict[str, int], + location_ref_to_uprn_map: Dict[str, int], ) -> List[PeabodyProperty]: - assets_by_address: DefaultDict[str, List[PeabodyAssetCondition]] = defaultdict( - list + assets_by_location_reference: DefaultDict[str, List[PeabodyAssetCondition]] = ( + defaultdict(list) ) for asset in assets: - if asset.full_address is None: + if asset.lo_reference is None: continue - address = asset.full_address.strip() - assets_by_address[address].append(asset) + assets_by_location_reference[asset.lo_reference].append(asset) properties: List[PeabodyProperty] = [] - for address, grouped_assets in assets_by_address.items(): + for location_ref, grouped_assets in assets_by_location_reference.items(): - uprn = address_to_uprn_map.get(address) + uprn = location_ref_to_uprn_map.get(location_ref) if uprn is None: - logger.warning(f"No UPRN found for address: {address}") + logger.warning(f"No UPRN found for Location Reference: {location_ref}") continue properties.append(