mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
add debugging
This commit is contained in:
parent
f3ebe122f8
commit
a886911de4
1 changed files with 83 additions and 14 deletions
|
|
@ -7,6 +7,7 @@ from openpyxl import load_workbook
|
|||
from playwright.sync_api import (
|
||||
Locator,
|
||||
Page,
|
||||
Response,
|
||||
sync_playwright,
|
||||
TimeoutError as PlaywrightTimeoutError,
|
||||
)
|
||||
|
|
@ -29,6 +30,20 @@ class file_download_button_types(Enum):
|
|||
SAP_WORK_SHEET = 15
|
||||
|
||||
|
||||
def attach_debug_listeners(page: Page) -> None:
|
||||
def handle_response(response: Response) -> None:
|
||||
url: str = response.url
|
||||
status: int = response.status
|
||||
|
||||
if "download" in url or "report" in url:
|
||||
logger.info(f"[RESPONSE] {status} {url}")
|
||||
|
||||
if status >= 400:
|
||||
logger.error(f"[ERROR RESPONSE] {status} {url}")
|
||||
|
||||
page.on("response", handle_response)
|
||||
|
||||
|
||||
def extract_addresses_from_spreadsheet(filepath: str) -> Dict[str, str]:
|
||||
wb = load_workbook(filepath, data_only=True)
|
||||
ws = wb["Southern RA-Lite Programme 3103"]
|
||||
|
|
@ -147,6 +162,12 @@ def go_to_assessment_details(page: Page, row: Locator) -> None:
|
|||
with page.expect_navigation():
|
||||
account_link.click()
|
||||
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
page.wait_for_selector("a.download-report-btn", timeout=10000)
|
||||
|
||||
logger.info("Assessment details page fully loaded")
|
||||
|
||||
|
||||
def go_to_next_page(page: Page) -> bool:
|
||||
next_button: Locator = page.locator("#assessmentDatatable_next a")
|
||||
|
|
@ -168,21 +189,60 @@ def build_report_selector(report_type: int) -> str:
|
|||
return f"a.download-report-btn[data-report-type='{report_type}']"
|
||||
|
||||
|
||||
def download_report_by_selector(page: Page, selector: str) -> str:
|
||||
page.wait_for_selector(selector, timeout=10000)
|
||||
def download_report_by_selector(page: Page, selector: str) -> Optional[str]:
|
||||
try:
|
||||
element: Locator = page.locator(selector)
|
||||
|
||||
with page.expect_download() as download_info:
|
||||
page.click(selector)
|
||||
element.wait_for(state="visible", timeout=10000)
|
||||
|
||||
download = download_info.value
|
||||
filename: str = download.suggested_filename
|
||||
if not element.is_enabled():
|
||||
logger.warning(f"Element not enabled: {selector}")
|
||||
return None
|
||||
|
||||
save_path: str = os.path.join(os.getcwd(), filename)
|
||||
download.save_as(save_path)
|
||||
element.scroll_into_view_if_needed()
|
||||
|
||||
logger.info(f"Downloaded: {filename}")
|
||||
page.wait_for_timeout(300)
|
||||
|
||||
return save_path
|
||||
logger.info(f"Attempting download via selector: {selector}")
|
||||
logger.info(f"Current URL: {page.url}")
|
||||
|
||||
with page.expect_download(timeout=15000) as download_info:
|
||||
element.click()
|
||||
|
||||
download = download_info.value
|
||||
filename: str = download.suggested_filename
|
||||
|
||||
save_path: str = os.path.join(os.getcwd(), filename)
|
||||
download.save_as(save_path)
|
||||
|
||||
logger.info(f"Downloaded: {filename}")
|
||||
|
||||
return save_path
|
||||
|
||||
except PlaywrightTimeoutError:
|
||||
logger.error(f"Download NOT triggered for selector: {selector}")
|
||||
logger.error(f"Current URL at failure: {page.url}")
|
||||
|
||||
try:
|
||||
content_snippet = page.content()[:1000]
|
||||
logger.error(f"Page snippet: {content_snippet}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def download_with_retry(page: Page, selector: str) -> Optional[str]:
|
||||
for attempt in range(3):
|
||||
file_path = download_report_by_selector(page, selector)
|
||||
|
||||
if file_path:
|
||||
return file_path
|
||||
|
||||
logger.warning(f"Retry {attempt + 1} for {selector}")
|
||||
page.wait_for_timeout(1500)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
|
||||
|
|
@ -244,6 +304,7 @@ def download_report() -> None:
|
|||
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
attach_debug_listeners(page)
|
||||
|
||||
try:
|
||||
login(page, username, password)
|
||||
|
|
@ -267,7 +328,7 @@ def download_report() -> None:
|
|||
last_name: str = cells.nth(2).inner_text().strip()
|
||||
address: str = cells.nth(5).inner_text().strip()
|
||||
postcode: str = cells.nth(7).inner_text().strip()
|
||||
uprn: str = cells.nth(8).inner_text().strip()
|
||||
# uprn: str = cells.nth(8).inner_text().strip()
|
||||
status: str = cells.nth(9).inner_text().strip()
|
||||
|
||||
if first_name == "Oliver" and last_name == "Stephens":
|
||||
|
|
@ -296,16 +357,24 @@ def download_report() -> None:
|
|||
|
||||
for report_type in report_types:
|
||||
selector: str = build_report_selector(report_type)
|
||||
file_path: str = download_report_by_selector(page, selector)
|
||||
file_path: Optional[str] = download_with_retry(
|
||||
page, selector
|
||||
)
|
||||
|
||||
if not file_path:
|
||||
continue
|
||||
try:
|
||||
sharepoint_client.upload_file(
|
||||
file_path=file_path,
|
||||
sharepoint_path=f"{sharepoint_base_path}/{sharepoint_address}/1. Retrofit Assessment/A. Assessment",
|
||||
file_name=os.path.basename(file_path),
|
||||
)
|
||||
logger.info(
|
||||
f"Successfully uploaded file {os.path.basename(file_path)} to sharepoint"
|
||||
)
|
||||
# TODO: could s3 load happen for all files at once to reduce db roundtrips?
|
||||
if uprn:
|
||||
upload_job_to_s3_and_update_db([file_path], uprn)
|
||||
# if uprn:
|
||||
# upload_job_to_s3_and_update_db([file_path], uprn)
|
||||
finally:
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue