add debugging

This commit is contained in:
Daniel Roth 2026-04-02 08:29:09 +00:00
parent f3ebe122f8
commit a886911de4

View file

@ -7,6 +7,7 @@ from openpyxl import load_workbook
from playwright.sync_api import (
Locator,
Page,
Response,
sync_playwright,
TimeoutError as PlaywrightTimeoutError,
)
@ -29,6 +30,20 @@ class file_download_button_types(Enum):
SAP_WORK_SHEET = 15
def attach_debug_listeners(page: Page) -> None:
def handle_response(response: Response) -> None:
url: str = response.url
status: int = response.status
if "download" in url or "report" in url:
logger.info(f"[RESPONSE] {status} {url}")
if status >= 400:
logger.error(f"[ERROR RESPONSE] {status} {url}")
page.on("response", handle_response)
def extract_addresses_from_spreadsheet(filepath: str) -> Dict[str, str]:
wb = load_workbook(filepath, data_only=True)
ws = wb["Southern RA-Lite Programme 3103"]
@ -147,6 +162,12 @@ def go_to_assessment_details(page: Page, row: Locator) -> None:
with page.expect_navigation():
account_link.click()
page.wait_for_load_state("networkidle")
page.wait_for_selector("a.download-report-btn", timeout=10000)
logger.info("Assessment details page fully loaded")
def go_to_next_page(page: Page) -> bool:
next_button: Locator = page.locator("#assessmentDatatable_next a")
@ -168,21 +189,60 @@ def build_report_selector(report_type: int) -> str:
return f"a.download-report-btn[data-report-type='{report_type}']"
def download_report_by_selector(page: Page, selector: str) -> str:
page.wait_for_selector(selector, timeout=10000)
def download_report_by_selector(page: Page, selector: str) -> Optional[str]:
try:
element: Locator = page.locator(selector)
with page.expect_download() as download_info:
page.click(selector)
element.wait_for(state="visible", timeout=10000)
download = download_info.value
filename: str = download.suggested_filename
if not element.is_enabled():
logger.warning(f"Element not enabled: {selector}")
return None
save_path: str = os.path.join(os.getcwd(), filename)
download.save_as(save_path)
element.scroll_into_view_if_needed()
logger.info(f"Downloaded: {filename}")
page.wait_for_timeout(300)
return save_path
logger.info(f"Attempting download via selector: {selector}")
logger.info(f"Current URL: {page.url}")
with page.expect_download(timeout=15000) as download_info:
element.click()
download = download_info.value
filename: str = download.suggested_filename
save_path: str = os.path.join(os.getcwd(), filename)
download.save_as(save_path)
logger.info(f"Downloaded: {filename}")
return save_path
except PlaywrightTimeoutError:
logger.error(f"Download NOT triggered for selector: {selector}")
logger.error(f"Current URL at failure: {page.url}")
try:
content_snippet = page.content()[:1000]
logger.error(f"Page snippet: {content_snippet}")
except Exception:
pass
return None
def download_with_retry(page: Page, selector: str) -> Optional[str]:
for attempt in range(3):
file_path = download_report_by_selector(page, selector)
if file_path:
return file_path
logger.warning(f"Retry {attempt + 1} for {selector}")
page.wait_for_timeout(1500)
return None
def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
@ -244,6 +304,7 @@ def download_report() -> None:
context = browser.new_context()
page = context.new_page()
attach_debug_listeners(page)
try:
login(page, username, password)
@ -267,7 +328,7 @@ def download_report() -> None:
last_name: str = cells.nth(2).inner_text().strip()
address: str = cells.nth(5).inner_text().strip()
postcode: str = cells.nth(7).inner_text().strip()
uprn: str = cells.nth(8).inner_text().strip()
# uprn: str = cells.nth(8).inner_text().strip()
status: str = cells.nth(9).inner_text().strip()
if first_name == "Oliver" and last_name == "Stephens":
@ -296,16 +357,24 @@ def download_report() -> None:
for report_type in report_types:
selector: str = build_report_selector(report_type)
file_path: str = download_report_by_selector(page, selector)
file_path: Optional[str] = download_with_retry(
page, selector
)
if not file_path:
continue
try:
sharepoint_client.upload_file(
file_path=file_path,
sharepoint_path=f"{sharepoint_base_path}/{sharepoint_address}/1. Retrofit Assessment/A. Assessment",
file_name=os.path.basename(file_path),
)
logger.info(
f"Successfully uploaded file {os.path.basename(file_path)} to sharepoint"
)
# TODO: could s3 load happen for all files at once to reduce db roundtrips?
if uprn:
upload_job_to_s3_and_update_db([file_path], uprn)
# if uprn:
# upload_job_to_s3_and_update_db([file_path], uprn)
finally:
if os.path.exists(file_path):
os.remove(file_path)