Model/backend/ecmk_fetcher/address_list.py

68 lines
1.9 KiB
Python

import re
from dataclasses import dataclass
from typing import Any, Dict, Optional
from openpyxl import Workbook, load_workbook
from openpyxl.worksheet.worksheet import Worksheet
@dataclass
class PropertyRow:
row_index: int
address: str
def extract_addresses_from_spreadsheet(
filepath: str,
) -> Dict[str, PropertyRow]:
wb: Workbook = load_workbook(filepath, data_only=True)
ws: Worksheet = wb["Southern RA-Lite Programme 3103"]
header_row: int = 1
id_col: Optional[int] = None
deal_name_col: Optional[int] = None
# find columns
for col in range(1, ws.max_column + 1):
raw_value: Any = ws.cell(row=header_row, column=col).value
value: str = str(raw_value).strip().lower() if raw_value else ""
if value == "id":
id_col = col
elif value == "deal name":
deal_name_col = col
if id_col is None or deal_name_col is None:
raise Exception("Missing required columns")
properties: Dict[str, PropertyRow] = {}
for row in range(2, ws.max_row + 1):
id_val: Any = ws.cell(row=row, column=id_col).value
deal_name: Any = ws.cell(row=row, column=deal_name_col).value
if not id_val or not deal_name:
continue
property_id: str = str(id_val).strip()
properties[property_id] = PropertyRow(
row_index=row,
address=extract_succinct_address(str(deal_name)),
)
return properties
def extract_succinct_address(deal_name: str) -> str:
left_part = deal_name.split("|")[0].strip()
postcode_match: Optional[re.Match[str]] = re.search(
r"\b([A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2})\b",
left_part,
re.IGNORECASE,
)
postcode = postcode_match.group(1).upper() if postcode_match else None
first_part = left_part.split(",")[0].strip()
return f"{first_part} {postcode}" if postcode else first_part