refactor with helper functions

This commit is contained in:
Daniel Roth 2026-04-01 14:13:46 +00:00
parent 33c4572f48
commit 37a12ffb28

View file

@ -5,6 +5,7 @@ from typing import Any, Dict, List, Mapping, Optional
from openpyxl import load_workbook
from playwright.sync_api import (
Locator,
Page,
sync_playwright,
TimeoutError as PlaywrightTimeoutError,
)
@ -109,18 +110,86 @@ def build_property_id(address: str, postcode: str) -> str:
return f"{number}{postcode_clean}"
def download_report():
username = ""
password = ""
def login(page: Page, username: str, password: str) -> None:
page.goto("https://assessorhub.net/", timeout=30000)
property_list_file = (
username_input: Locator = page.locator("#Username")
password_input: Locator = page.locator("#Password")
username_input.wait_for(state="visible", timeout=10000)
username_input.fill(username)
password_input.wait_for(state="visible", timeout=10000)
password_input.fill(password)
with page.expect_navigation(timeout=15000):
page.click("button[type='submit']")
if "login" in page.url.lower():
raise Exception("Login failed")
logger.info("Login successful")
def go_to_assessments(page: Page) -> None:
page.goto("https://assessorhub.net/Companies/Assessments", timeout=30000)
page.wait_for_selector("#assessmentDatatable tbody tr", timeout=20000)
def go_to_assessment_details(page: Page, row: Locator) -> None:
account_link: Locator = row.locator("a")
with page.expect_navigation():
account_link.click()
def go_to_next_page(page: Page) -> bool:
next_button: Locator = page.locator("#assessmentDatatable_next a")
class_attr: Optional[str] = next_button.get_attribute("class") or ""
if "disabled" in class_attr:
logger.info("No more pages")
return False
next_button.scroll_into_view_if_needed()
next_button.click()
page.wait_for_timeout(2000)
return True
def build_report_selector(report_type: int) -> str:
return f"a.download-report-btn[data-report-type='{report_type}']"
def download_report_by_selector(page: Page, selector: str) -> str:
page.wait_for_selector(selector, timeout=10000)
with page.expect_download() as download_info:
page.click(selector)
download = download_info.value
filename: str = download.suggested_filename
save_path: str = os.path.join(os.getcwd(), filename)
download.save_as(save_path)
logger.info(f"Downloaded: {filename}")
return save_path
def download_report() -> None:
username: str = ""
password: str = ""
property_list_file: str = (
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
)
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
filepath = os.path.join(
BASE_DIR,
property_list_file,
)
BASE_DIR: str = os.path.dirname(os.path.dirname(__file__))
filepath: str = os.path.join(BASE_DIR, property_list_file)
property_id_to_address_map: Dict[str, str] = extract_addresses_from_spreadsheet(
filepath
)
@ -135,46 +204,28 @@ def download_report():
page = context.new_page()
try:
# Log into ECMK with playwright
page.goto("https://assessorhub.net/", timeout=30000)
username_input = page.locator("#Username")
password_input = page.locator("#Password")
username_input.wait_for(state="visible", timeout=10000)
username_input.fill(username)
password_input.wait_for(state="visible", timeout=10000)
password_input.fill(password)
with page.expect_navigation(timeout=15000):
page.click("button[type='submit']")
if "login" in page.url.lower():
raise Exception("Login failed")
login(page, username, password)
print("Login successful:", page.url)
page.goto("https://assessorhub.net/Companies/Assessments", timeout=30000)
page.wait_for_selector("#assessmentDatatable tbody tr", timeout=20000)
go_to_assessments(page)
while True:
rows = page.locator("#assessmentDatatable tbody tr")
row_count = rows.count()
rows: Locator = page.locator("#assessmentDatatable tbody tr")
row_count: int = rows.count()
logger.info(f"Processing {row_count} rows on current page")
for i in range(row_count):
row = rows.nth(i)
row: Locator = rows.nth(i)
try:
cells = row.locator("td")
cells: Locator = row.locator("td")
address = cells.nth(5).inner_text().strip()
postcode = cells.nth(7).inner_text().strip()
first_name = cells.nth(1).inner_text().strip()
last_name = cells.nth(2).inner_text().strip()
status = cells.nth(9).inner_text().strip()
address: str = cells.nth(5).inner_text().strip()
postcode: str = cells.nth(7).inner_text().strip()
first_name: str = cells.nth(1).inner_text().strip()
last_name: str = cells.nth(2).inner_text().strip()
status: str = cells.nth(9).inner_text().strip()
if first_name == "Oliver" and last_name == "Stephens":
continue
@ -182,63 +233,28 @@ def download_report():
if status != "Submitted (not Lodged)":
continue
property_id = build_property_id(address, postcode)
property_id: str = build_property_id(address, postcode)
if property_id not in property_ids:
continue
logger.info(f"MATCH FOUND: {property_id}")
matching_properties.append(property_id)
try:
sharepoint_address: str = property_id_to_address_map[
property_id
]
except Exception:
logger.error(
f"Unable to find sharepoint address for property ID {property_id}"
)
continue
sharepoint_address: str = property_id_to_address_map[
property_id
]
go_to_assessment_details(page, row)
# Go to assessment details page and download files
account_link = cells.nth(0).locator("a")
with page.expect_navigation():
account_link.click()
report_types: List[int] = [
file_download_button_types.ASSESSOR_HUB_SITENOTE_REPORT.value,
file_download_button_types.SITENOTE_REPORT.value,
]
assessment_hub_sitenote_selector = f"a.download-report-btn[data-report-type='{file_download_button_types.ASSESSOR_HUB_SITENOTE_REPORT.value}']"
page.wait_for_selector(
assessment_hub_sitenote_selector, timeout=10000
)
with page.expect_download() as download_info:
page.click(assessment_hub_sitenote_selector)
download = download_info.value
filename = download.suggested_filename
save_path = os.path.join(os.getcwd(), filename)
download.save_as(save_path)
logger.info(f"Downloaded: {filename}")
sitenote_report_selector = f"a.download-report-btn[data-report-type='{file_download_button_types.SITENOTE_REPORT.value}']"
page.wait_for_selector(sitenote_report_selector, timeout=10000)
with page.expect_download() as download_info:
page.click(sitenote_report_selector)
download = download_info.value
filename = download.suggested_filename
save_path = os.path.join(os.getcwd(), filename)
download.save_as(save_path)
logger.info(f"Downloaded: {filename}")
# stick in sharepoint
for report_type in report_types:
selector: str = build_report_selector(report_type)
download_report_by_selector(page, selector)
# TODO: stick in sharepoint
page.go_back()
page.wait_for_selector(
@ -248,20 +264,9 @@ def download_report():
except PlaywrightTimeoutError as e:
raise Exception(f"Timeout occurred: {str(e)}")
next_button: Locator = page.locator("#assessmentDatatable_next a")
class_attr = next_button.get_attribute("class") or ""
if "disabled" in class_attr:
logger.info("No more pages")
if not go_to_next_page(page):
break
# first_row_text = rows.first.inner_text()
next_button.scroll_into_view_if_needed()
next_button.click()
page.wait_for_timeout(2000)
except PlaywrightTimeoutError as e:
raise Exception(f"Timeout occurred: {str(e)}")