diff --git a/backend/ecmk_fetcher/handler/handler.py b/backend/ecmk_fetcher/handler/handler.py index 39842a36..7f26fb69 100644 --- a/backend/ecmk_fetcher/handler/handler.py +++ b/backend/ecmk_fetcher/handler/handler.py @@ -5,6 +5,7 @@ from typing import Any, Dict, List, Mapping, Optional from openpyxl import load_workbook from playwright.sync_api import ( Locator, + Page, sync_playwright, TimeoutError as PlaywrightTimeoutError, ) @@ -109,18 +110,86 @@ def build_property_id(address: str, postcode: str) -> str: return f"{number}{postcode_clean}" -def download_report(): - username = "" - password = "" +def login(page: Page, username: str, password: str) -> None: + page.goto("https://assessorhub.net/", timeout=30000) - property_list_file = ( + username_input: Locator = page.locator("#Username") + password_input: Locator = page.locator("#Password") + + username_input.wait_for(state="visible", timeout=10000) + username_input.fill(username) + + password_input.wait_for(state="visible", timeout=10000) + password_input.fill(password) + + with page.expect_navigation(timeout=15000): + page.click("button[type='submit']") + + if "login" in page.url.lower(): + raise Exception("Login failed") + + logger.info("Login successful") + + +def go_to_assessments(page: Page) -> None: + page.goto("https://assessorhub.net/Companies/Assessments", timeout=30000) + page.wait_for_selector("#assessmentDatatable tbody tr", timeout=20000) + + +def go_to_assessment_details(page: Page, row: Locator) -> None: + account_link: Locator = row.locator("a") + with page.expect_navigation(): + account_link.click() + + +def go_to_next_page(page: Page) -> bool: + next_button: Locator = page.locator("#assessmentDatatable_next a") + + class_attr: Optional[str] = next_button.get_attribute("class") or "" + + if "disabled" in class_attr: + logger.info("No more pages") + return False + + next_button.scroll_into_view_if_needed() + next_button.click() + + page.wait_for_timeout(2000) + return True + + +def build_report_selector(report_type: int) -> str: + return f"a.download-report-btn[data-report-type='{report_type}']" + + +def download_report_by_selector(page: Page, selector: str) -> str: + page.wait_for_selector(selector, timeout=10000) + + with page.expect_download() as download_info: + page.click(selector) + + download = download_info.value + filename: str = download.suggested_filename + + save_path: str = os.path.join(os.getcwd(), filename) + download.save_as(save_path) + + logger.info(f"Downloaded: {filename}") + + return save_path + + +def download_report() -> None: + username: str = "" + password: str = "" + + property_list_file: str = ( "hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx" ) - BASE_DIR = os.path.dirname(os.path.dirname(__file__)) - filepath = os.path.join( - BASE_DIR, - property_list_file, - ) + + BASE_DIR: str = os.path.dirname(os.path.dirname(__file__)) + filepath: str = os.path.join(BASE_DIR, property_list_file) + property_id_to_address_map: Dict[str, str] = extract_addresses_from_spreadsheet( filepath ) @@ -135,46 +204,28 @@ def download_report(): page = context.new_page() try: - # Log into ECMK with playwright - page.goto("https://assessorhub.net/", timeout=30000) - - username_input = page.locator("#Username") - password_input = page.locator("#Password") - - username_input.wait_for(state="visible", timeout=10000) - username_input.fill(username) - - password_input.wait_for(state="visible", timeout=10000) - password_input.fill(password) - - with page.expect_navigation(timeout=15000): - page.click("button[type='submit']") - - if "login" in page.url.lower(): - raise Exception("Login failed") - + login(page, username, password) print("Login successful:", page.url) - page.goto("https://assessorhub.net/Companies/Assessments", timeout=30000) - page.wait_for_selector("#assessmentDatatable tbody tr", timeout=20000) + go_to_assessments(page) while True: - rows = page.locator("#assessmentDatatable tbody tr") - row_count = rows.count() + rows: Locator = page.locator("#assessmentDatatable tbody tr") + row_count: int = rows.count() logger.info(f"Processing {row_count} rows on current page") for i in range(row_count): - row = rows.nth(i) + row: Locator = rows.nth(i) try: - cells = row.locator("td") + cells: Locator = row.locator("td") - address = cells.nth(5).inner_text().strip() - postcode = cells.nth(7).inner_text().strip() - first_name = cells.nth(1).inner_text().strip() - last_name = cells.nth(2).inner_text().strip() - status = cells.nth(9).inner_text().strip() + address: str = cells.nth(5).inner_text().strip() + postcode: str = cells.nth(7).inner_text().strip() + first_name: str = cells.nth(1).inner_text().strip() + last_name: str = cells.nth(2).inner_text().strip() + status: str = cells.nth(9).inner_text().strip() if first_name == "Oliver" and last_name == "Stephens": continue @@ -182,63 +233,28 @@ def download_report(): if status != "Submitted (not Lodged)": continue - property_id = build_property_id(address, postcode) + property_id: str = build_property_id(address, postcode) if property_id not in property_ids: continue + logger.info(f"MATCH FOUND: {property_id}") matching_properties.append(property_id) - try: - sharepoint_address: str = property_id_to_address_map[ - property_id - ] - except Exception: - logger.error( - f"Unable to find sharepoint address for property ID {property_id}" - ) - continue + sharepoint_address: str = property_id_to_address_map[ + property_id + ] + go_to_assessment_details(page, row) - # Go to assessment details page and download files - account_link = cells.nth(0).locator("a") - with page.expect_navigation(): - account_link.click() + report_types: List[int] = [ + file_download_button_types.ASSESSOR_HUB_SITENOTE_REPORT.value, + file_download_button_types.SITENOTE_REPORT.value, + ] - assessment_hub_sitenote_selector = f"a.download-report-btn[data-report-type='{file_download_button_types.ASSESSOR_HUB_SITENOTE_REPORT.value}']" - - page.wait_for_selector( - assessment_hub_sitenote_selector, timeout=10000 - ) - - with page.expect_download() as download_info: - page.click(assessment_hub_sitenote_selector) - - download = download_info.value - - filename = download.suggested_filename - save_path = os.path.join(os.getcwd(), filename) - - download.save_as(save_path) - - logger.info(f"Downloaded: {filename}") - - sitenote_report_selector = f"a.download-report-btn[data-report-type='{file_download_button_types.SITENOTE_REPORT.value}']" - - page.wait_for_selector(sitenote_report_selector, timeout=10000) - - with page.expect_download() as download_info: - page.click(sitenote_report_selector) - - download = download_info.value - - filename = download.suggested_filename - save_path = os.path.join(os.getcwd(), filename) - - download.save_as(save_path) - - logger.info(f"Downloaded: {filename}") - - # stick in sharepoint + for report_type in report_types: + selector: str = build_report_selector(report_type) + download_report_by_selector(page, selector) + # TODO: stick in sharepoint page.go_back() page.wait_for_selector( @@ -248,20 +264,9 @@ def download_report(): except PlaywrightTimeoutError as e: raise Exception(f"Timeout occurred: {str(e)}") - next_button: Locator = page.locator("#assessmentDatatable_next a") - class_attr = next_button.get_attribute("class") or "" - - if "disabled" in class_attr: - logger.info("No more pages") + if not go_to_next_page(page): break - # first_row_text = rows.first.inner_text() - - next_button.scroll_into_view_if_needed() - next_button.click() - - page.wait_for_timeout(2000) - except PlaywrightTimeoutError as e: raise Exception(f"Timeout occurred: {str(e)}")