mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
43 lines
1 KiB
Python
43 lines
1 KiB
Python
import re
|
|
from typing import Dict, List
|
|
from openpyxl import load_workbook
|
|
|
|
from backend.pashub_fetcher.job import Job
|
|
|
|
|
|
def extract_jobs(filepath: str) -> List[Job]:
|
|
wb = load_workbook(filepath, data_only=True)
|
|
ws = wb["filtered"]
|
|
|
|
HEADER_ROW = 3
|
|
|
|
headers: Dict[str, int] = {}
|
|
for col in range(1, ws.max_column + 1):
|
|
value = str(ws.cell(row=HEADER_ROW, column=col).value)
|
|
if value:
|
|
headers[value.strip()] = col
|
|
|
|
name_col = headers["Name"]
|
|
link_col = headers["PasHub Link"]
|
|
|
|
jobs: List[Job] = []
|
|
|
|
for row in range(HEADER_ROW + 1, ws.max_row + 1):
|
|
name = ws.cell(row=row, column=name_col).value
|
|
link = ws.cell(row=row, column=link_col).value
|
|
|
|
if not name or not link:
|
|
continue
|
|
|
|
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
|
|
if not match:
|
|
continue
|
|
|
|
jobs.append(
|
|
{
|
|
"id": match.group(1),
|
|
"address": str(name),
|
|
}
|
|
)
|
|
|
|
return jobs
|