import os from typing import Dict from playwright.sync_api import ( sync_playwright, Locator, Page, Browser, BrowserContext, ) from backend.app.db.functions.uploaded_files_functions import ( get_uploaded_file_by_listing_type_and_source, ) from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum from backend.ecmk_fetcher.address_list import ( PropertyRow, extract_addresses_from_spreadsheet, ) from backend.ecmk_fetcher.browser import ( attach_debug_listeners, download_with_retry, go_to_assessment_details, go_to_assessments, go_to_next_page, login, ) from backend.ecmk_fetcher.reports import ( REPORT_TYPES, FileDownloadButtonType, build_property_id, map_report_type_to_db_file_type, ) from backend.ecmk_fetcher.upload import ( upload_file_to_s3_and_update_db, upload_file_to_sharepoint, ) from utils.logger import setup_logger from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient from utils.sharepoint.domna_sites import DomnaSites logger = setup_logger() def run_job() -> None: username: str = "" # TODO: get from github secrets password: str = "" property_list_file: str = ( "hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx" ) BASE_DIR: str = os.path.dirname(__file__) filepath: str = os.path.join(BASE_DIR, property_list_file) property_map: Dict[str, PropertyRow] = extract_addresses_from_spreadsheet(filepath) sharepoint_client: DomnaSharepointClient = DomnaSharepointClient( sharepoint_location=DomnaSites.PRIVATE_PAY ) sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments" s3_bucket: str = "retrofit-energy-assessments-dev" with sync_playwright() as p: browser: Browser = p.chromium.launch(headless=True) context: BrowserContext = browser.new_context() page: Page = context.new_page() attach_debug_listeners(page) try: login(page, username, password) go_to_assessments(page) while True: rows: Locator = page.locator("#assessmentDatatable tbody tr") row_count: int = rows.count() for i in range(row_count): row: Locator = rows.nth(i) try: cells: Locator = row.locator("td") first_name: str = cells.nth(1).inner_text().strip() last_name: str = cells.nth(2).inner_text().strip() address: str = cells.nth(5).inner_text().strip() postcode: str = cells.nth(7).inner_text().strip() status: str = cells.nth(9).inner_text().strip() if first_name == "Oliver" and last_name == "Stephens": continue if status != "Submitted (not Lodged)": continue property_id: str = build_property_id(address, postcode) property_row: PropertyRow | None = property_map.get(property_id) if not property_row: continue logger.info(f"Match found for property {address}") sharepoint_address: str = property_row.address go_to_assessment_details(page, row) for report_type in REPORT_TYPES: hubspot_listing_id: str = property_row.listing_id try: db_file_type: FileTypeEnum = ( map_report_type_to_db_file_type(report_type) ) except ValueError: logger.error( f"Unknown report type {report_type}, skipping file" ) continue if get_uploaded_file_by_listing_type_and_source( hubspot_listing_id=int(hubspot_listing_id), file_type=db_file_type, file_source=FileSourceEnum.ECMK, ): logger.debug("File already uploaded to s3, skipping") continue file_path: str | None = download_with_retry( page, report_type ) if not file_path: continue logger.info( f"Successfully downloaded file {os.path.basename(file_path)} from ECMK" ) try: if report_type == FileDownloadButtonType.RAW_XML.value: # TODO: extract data from XML and write to file pass else: upload_file_to_sharepoint( client=sharepoint_client, file_path=file_path, base_path=sharepoint_base_path, subpath=sharepoint_address, ) logger.info( f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}" ) # Upload to s3 and update db upload_file_to_s3_and_update_db( bucket=s3_bucket, file_path=file_path, hubspot_listing_id=hubspot_listing_id, file_type=db_file_type, ) except Exception: raise finally: if os.path.exists(file_path): os.remove(file_path) page.go_back() page.wait_for_selector( "#assessmentDatatable tbody tr", timeout=15000 ) except Exception as e: raise Exception(f"Row processing failed: {str(e)}") from e if not go_to_next_page(page): break finally: context.close() browser.close()