Model/backend/pashub_fetcher/spreadsheet.py

43 lines
1 KiB
Python

import re
from typing import Dict, List
from openpyxl import load_workbook
from backend.pashub_fetcher.job import Job
def extract_jobs(filepath: str) -> List[Job]:
wb = load_workbook(filepath, data_only=True)
ws = wb["filtered"]
HEADER_ROW = 3
headers: Dict[str, int] = {}
for col in range(1, ws.max_column + 1):
value = str(ws.cell(row=HEADER_ROW, column=col).value)
if value:
headers[value.strip()] = col
name_col = headers["Name"]
link_col = headers["PasHub Link"]
jobs: List[Job] = []
for row in range(HEADER_ROW + 1, ws.max_row + 1):
name = ws.cell(row=row, column=name_col).value
link = ws.cell(row=row, column=link_col).value
if not name or not link:
continue
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
if not match:
continue
jobs.append(
{
"id": match.group(1),
"address": str(name),
}
)
return jobs