import re from typing import Dict, List from openpyxl import load_workbook from backend.pashub_fetcher.job import Job def extract_jobs(filepath: str) -> List[Job]: wb = load_workbook(filepath, data_only=True) ws = wb["filtered"] HEADER_ROW = 3 headers: Dict[str, int] = {} for col in range(1, ws.max_column + 1): value = str(ws.cell(row=HEADER_ROW, column=col).value) if value: headers[value.strip()] = col name_col = headers["Name"] link_col = headers["PasHub Link"] jobs: List[Job] = [] for row in range(HEADER_ROW + 1, ws.max_row + 1): name = ws.cell(row=row, column=name_col).value link = ws.cell(row=row, column=link_col).value if not name or not link: continue match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link)) if not match: continue jobs.append( { "id": match.group(1), "address": str(name), } ) return jobs