import os import re # import time from typing import Any, Dict, List, Mapping from openpyxl import load_workbook from backend.pashub_fetcher.job import Job from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError from backend.pashub_fetcher.token_getter import get_token_from_local_storage from utils.logger import setup_logger from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient from utils.sharepoint.domna_sites import DomnaSites logger = setup_logger() def extract_jobs(filepath: str) -> List[Job]: wb = load_workbook(filepath, data_only=True) ws = wb["watford warm homes (wave 3) mai"] HEADER_ROW = 3 headers: Dict[str, int] = {} for col in range(1, ws.max_column + 1): value = str(ws.cell(row=HEADER_ROW, column=col).value) if value: headers[value.strip()] = col name_col = headers["Name"] link_col = headers["Pashub Link"] jobs: List[Job] = [] for row in range(HEADER_ROW + 1, ws.max_row + 1): name = ws.cell(row=row, column=name_col).value link = ws.cell(row=row, column=link_col).value if not name or not link: continue link = str(link) match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", link) if not match: continue job_id = match.group(1) jobs.append({"id": job_id, "address": str(name)}) return jobs def handler(event: Mapping[str, Any], context: Any) -> None: BASE_DIR = os.path.dirname(os.path.dirname(__file__)) filepath = os.path.join(BASE_DIR, "Watford_Warm_Homes_Wave_3_RA Downloads .xlsx") jobs: List[Job] = extract_jobs(filepath) logger.info("Successfully loaded jobs from spreadsheet") pas_hub_email = "random@test.com" pas_hub_password = "my_fake_password" try: token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password) logger.info(f"Token extracted successfully") except: logger.error("Error getting auth token from Pas Hub") raise pashub_client = PashubClient(token=token) sharepoint_client = DomnaSharepointClient( sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3 ) sharepoint_client.makedir("Watford Test", "/JTK Test Folder") saved_file_paths: List[str] = [] for job in jobs: try: job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id( job["id"] ) # Upload files to sharepoint sharepoint_client.makedir(job["address"], "/JTK Test Folder/Watford Test") for file_path in job_files: sharepoint_client.upload_file( file_path, f"/JTK Test Folder/Watford Test/{job['address']}", file_path.split("/")[-1], ) saved_file_paths.extend(job_files) except UnauthorizedError: logger.warning("Token expired - refreshing") token = get_token_from_local_storage(pas_hub_email, pas_hub_password) pashub_client = PashubClient(token=token) # retry once saved_file_paths.extend( pashub_client.get_core_evidence_files_by_job_id(job["id"]) ) # time.sleep(10) # Simulate manual download print(f"saved {len(saved_file_paths)} files") if __name__ == "__main__": event = {"Records": [{"body": "{}"}]} handler(event, None)