diff --git a/backend/pashub_fetcher/Watford_Warm_Homes_Wave_3_RA Downloads .xlsx b/backend/pashub_fetcher/Watford_Warm_Homes_Wave_3_RA Downloads .xlsx new file mode 100644 index 00000000..6e41291b Binary files /dev/null and b/backend/pashub_fetcher/Watford_Warm_Homes_Wave_3_RA Downloads .xlsx differ diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile index fd66f53e..a4d322ab 100644 --- a/backend/pashub_fetcher/handler/Dockerfile +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -5,7 +5,7 @@ ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest RUN chmod +x /usr/local/bin/aws-lambda-rie # Install Lambda runtime client -RUN pip install awslambdaric playwright==1.58.0 requests msal +RUN pip install awslambdaric playwright==1.58.0 requests msal openpyxl # Set working directory (Lambda task root) WORKDIR /var/task diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index f99dd688..d9941903 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,5 +1,9 @@ +import os +import re import time -from typing import Any, List, Mapping +from typing import Any, Dict, List, Mapping + +from openpyxl import load_workbook from backend.pashub_fetcher.job import Job from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError @@ -12,9 +16,62 @@ from utils.sharepoint.domna_sites import DomnaSites logger = setup_logger() +def extract_jobs(filepath: str) -> List[Job]: + wb = load_workbook(filepath, data_only=True) + ws = wb["watford warm homes (wave 3) mai"] + + HEADER_ROW = 3 + + headers: Dict[str, int] = {} + for col in range(1, ws.max_column + 1): + value = str(ws.cell(row=HEADER_ROW, column=col).value) + if value: + headers[value.strip()] = col + + name_col = headers["Name"] + link_col = headers["Pashub Link"] + + jobs: List[Job] = [] + + for row in range(HEADER_ROW + 1, ws.max_row + 1): + name = ws.cell(row=row, column=name_col).value + link = ws.cell(row=row, column=link_col).value + + if not name or not link: + continue + + link = str(link) + + match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", link) + if not match: + continue + + job_id = match.group(1) + + jobs.append({"id": job_id, "address": str(name)}) + if not match: + continue + + job_id = match.group(1) + + jobs.append({"id": job_id, "address": str(name)}) + + return jobs + + def handler(event: Mapping[str, Any], context: Any) -> None: - pas_hub_email = "random@test.com" - pas_hub_password = "my_fake_password" + BASE_DIR = os.path.dirname(os.path.dirname(__file__)) + filepath = os.path.join(BASE_DIR, "Watford_Warm_Homes_Wave_3_RA Downloads .xlsx") + + jobs: List[Job] = extract_jobs(filepath) + + logger.info("Successfully loaded jobs from spreadsheet") + + # pas_hub_email = "random@test.com" + # pas_hub_password = "my_fake_password" + + pas_hub_email = "sebastian@osmosis-acd.com" + pas_hub_password = "Osmosis2025!" try: token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password) @@ -28,17 +85,6 @@ def handler(event: Mapping[str, Any], context: Any) -> None: sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3 ) - jobs: List[Job] = [ - { - "id": "5abf6e27-e4c4-4ba8-b69d-9e34939e0002", - "address": "FLAT 11 Abbey View, Garsmouth Way, Watford, WD25 9DY", - }, - { - "id": "047f4455-85e2-4293-97b1-6b460137d33e", - "address": "FLAT 14 Abbey View, Garsmouth Way, Watford, WD25 9DY", - }, - ] # TODO: get these from request body or spreadsheet - sharepoint_client.makedir("Watford Test", "/JTK Test Folder") saved_file_paths: List[str] = []