mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
get uprn from reference:uprn file. parsing test fails for now, need to fix
This commit is contained in:
parent
d83d73c129
commit
d26ea2863b
3 changed files with 36 additions and 17 deletions
|
|
@ -20,9 +20,9 @@ def main() -> None:
|
||||||
/ "peabody"
|
/ "peabody"
|
||||||
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
|
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
|
||||||
)
|
)
|
||||||
# filepaths = [lbwf_path, peabody_path]
|
filepaths = [lbwf_path, peabody_path]
|
||||||
# filepaths = [lbwf_path]
|
# filepaths = [lbwf_path]
|
||||||
filepaths = [peabody_path]
|
# filepaths = [peabody_path]
|
||||||
|
|
||||||
for fp in filepaths:
|
for fp in filepaths:
|
||||||
with fp.open("rb") as f:
|
with fp.open("rb") as f:
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import BinaryIO, Any
|
from typing import BinaryIO, Any
|
||||||
|
|
||||||
|
|
||||||
class Parser(ABC):
|
class Parser(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def parse(self, file_stream: BinaryIO) -> Any:
|
def parse(self, file_stream: BinaryIO) -> Any:
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict
|
from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict
|
||||||
from openpyxl import Workbook, load_workbook
|
from openpyxl import Workbook, load_workbook
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
@ -15,17 +17,34 @@ logger = setup_logger()
|
||||||
class PeabodyParser(Parser):
|
class PeabodyParser(Parser):
|
||||||
def parse(self, file_stream: BinaryIO) -> Any:
|
def parse(self, file_stream: BinaryIO) -> Any:
|
||||||
wb: Workbook = load_workbook(file_stream)
|
wb: Workbook = load_workbook(file_stream)
|
||||||
address_to_uprn_map: Dict[str, int] = (
|
location_ref_to_uprn_map: Dict[str, int] = (
|
||||||
PeabodyParser._generate_address_to_uprn_dict(wb)
|
PeabodyParser._build_location_ref_to_uprn_map()
|
||||||
)
|
)
|
||||||
|
|
||||||
assets = self._parse_assets(wb)
|
assets = PeabodyParser._parse_assets(wb)
|
||||||
|
|
||||||
return self._group_assets_into_properties(
|
return PeabodyParser._group_assets_into_properties(
|
||||||
assets=assets,
|
assets=assets,
|
||||||
address_to_uprn_map=address_to_uprn_map,
|
location_ref_to_uprn_map=location_ref_to_uprn_map,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_location_ref_to_uprn_map() -> Dict[str, int]:
|
||||||
|
location_ref_to_uprn_filepath: Path = (
|
||||||
|
Path(__file__).resolve().parents[1]
|
||||||
|
/ "sample_data"
|
||||||
|
/ "peabody"
|
||||||
|
/ "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
|
||||||
|
)
|
||||||
|
location_ref_to_uprn_map: Dict[str, int] = {}
|
||||||
|
|
||||||
|
with location_ref_to_uprn_filepath.open(newline="") as f:
|
||||||
|
reader: Any = csv.DictReader(f)
|
||||||
|
for row in reader:
|
||||||
|
location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"])
|
||||||
|
|
||||||
|
return location_ref_to_uprn_map
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
|
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
|
||||||
assets_sheet = wb["Survey Records - D & Lower"]
|
assets_sheet = wb["Survey Records - D & Lower"]
|
||||||
|
|
@ -54,27 +73,26 @@ class PeabodyParser(Parser):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _group_assets_into_properties(
|
def _group_assets_into_properties(
|
||||||
assets: List[PeabodyAssetCondition],
|
assets: List[PeabodyAssetCondition],
|
||||||
address_to_uprn_map: Dict[str, int],
|
location_ref_to_uprn_map: Dict[str, int],
|
||||||
) -> List[PeabodyProperty]:
|
) -> List[PeabodyProperty]:
|
||||||
assets_by_address: DefaultDict[str, List[PeabodyAssetCondition]] = defaultdict(
|
assets_by_location_reference: DefaultDict[str, List[PeabodyAssetCondition]] = (
|
||||||
list
|
defaultdict(list)
|
||||||
)
|
)
|
||||||
|
|
||||||
for asset in assets:
|
for asset in assets:
|
||||||
if asset.full_address is None:
|
if asset.lo_reference is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
address = asset.full_address.strip()
|
assets_by_location_reference[asset.lo_reference].append(asset)
|
||||||
assets_by_address[address].append(asset)
|
|
||||||
|
|
||||||
properties: List[PeabodyProperty] = []
|
properties: List[PeabodyProperty] = []
|
||||||
|
|
||||||
for address, grouped_assets in assets_by_address.items():
|
for location_ref, grouped_assets in assets_by_location_reference.items():
|
||||||
|
|
||||||
uprn = address_to_uprn_map.get(address)
|
uprn = location_ref_to_uprn_map.get(location_ref)
|
||||||
|
|
||||||
if uprn is None:
|
if uprn is None:
|
||||||
logger.warning(f"No UPRN found for address: {address}")
|
logger.warning(f"No UPRN found for Location Reference: {location_ref}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
properties.append(
|
properties.append(
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue