mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
74 lines
2.2 KiB
Python
74 lines
2.2 KiB
Python
import re
|
|
from dataclasses import dataclass
|
|
from typing import Any, Dict, Optional
|
|
from openpyxl import Workbook, load_workbook
|
|
from openpyxl.worksheet.worksheet import Worksheet
|
|
|
|
|
|
@dataclass
|
|
class PropertyRow:
|
|
row_index: int
|
|
address: str
|
|
listing_id: str
|
|
|
|
|
|
def extract_addresses_from_spreadsheet(
|
|
filepath: str,
|
|
) -> Dict[str, PropertyRow]:
|
|
wb: Workbook = load_workbook(filepath, data_only=True)
|
|
ws: Worksheet = wb["Southern RA-Lite Programme 3103"]
|
|
|
|
header_row: int = 1
|
|
id_col: Optional[int] = None
|
|
deal_name_col: Optional[int] = None
|
|
listing_id_col: Optional[int] = None
|
|
|
|
# find columns
|
|
for col in range(1, ws.max_column + 1):
|
|
raw_value: Any = ws.cell(row=header_row, column=col).value
|
|
value: str = str(raw_value).strip().lower() if raw_value else ""
|
|
|
|
if value == "id":
|
|
id_col = col
|
|
elif value == "deal name":
|
|
deal_name_col = col
|
|
elif value == "associated listing ids":
|
|
listing_id_col = col
|
|
|
|
if id_col is None or deal_name_col is None or listing_id_col is None:
|
|
raise Exception("Missing required columns")
|
|
|
|
properties: Dict[str, PropertyRow] = {}
|
|
|
|
for row in range(2, ws.max_row + 1):
|
|
id_val: Any = ws.cell(row=row, column=id_col).value
|
|
deal_name: Any = ws.cell(row=row, column=deal_name_col).value
|
|
listing_id: Any = ws.cell(row=row, column=listing_id_col).value
|
|
|
|
if not id_val or not deal_name or not listing_id:
|
|
continue
|
|
|
|
property_id: str = str(id_val).strip()
|
|
|
|
properties[property_id] = PropertyRow(
|
|
row_index=row,
|
|
address=extract_succinct_address(str(deal_name)),
|
|
listing_id=listing_id,
|
|
)
|
|
|
|
return properties
|
|
|
|
|
|
def extract_succinct_address(deal_name: str) -> str:
|
|
left_part = deal_name.split("|")[0].strip()
|
|
|
|
postcode_match: Optional[re.Match[str]] = re.search(
|
|
r"\b([A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2})\b",
|
|
left_part,
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
postcode = postcode_match.group(1).upper() if postcode_match else None
|
|
first_part = left_part.split(",")[0].strip()
|
|
|
|
return f"{first_part} {postcode}" if postcode else first_part
|