mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
read address list from file and find matching rows in ecmk datatable
This commit is contained in:
parent
aa5cff4d19
commit
b121413b22
2 changed files with 137 additions and 22 deletions
|
|
@ -1,7 +1,16 @@
|
|||
import os
|
||||
from enum import Enum
|
||||
from typing import Any, Mapping
|
||||
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
|
||||
from typing import Any, List, Mapping
|
||||
from openpyxl import load_workbook
|
||||
from playwright.sync_api import (
|
||||
Locator,
|
||||
sync_playwright,
|
||||
TimeoutError as PlaywrightTimeoutError,
|
||||
)
|
||||
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class file_download_button_types(Enum):
|
||||
|
|
@ -12,10 +21,67 @@ class file_download_button_types(Enum):
|
|||
SAP_WORK_SHEET = 15
|
||||
|
||||
|
||||
def extract_ids_from_spreadsheet(filepath: str) -> List[str]:
|
||||
wb = load_workbook(filepath, data_only=True)
|
||||
ws = wb["Southern RA-Lite Programme 3103"]
|
||||
|
||||
ids: List[str] = []
|
||||
|
||||
header_row = 1
|
||||
id_col_index = None
|
||||
|
||||
for col in range(1, ws.max_column + 1):
|
||||
cell_value = ws.cell(row=header_row, column=col).value
|
||||
if cell_value and str(cell_value).strip().lower() == "id":
|
||||
id_col_index = col
|
||||
break
|
||||
|
||||
if id_col_index is None:
|
||||
raise Exception("ID column not found in spreadsheet")
|
||||
|
||||
for row in range(2, ws.max_row + 1):
|
||||
cell_value = ws.cell(row=row, column=id_col_index).value
|
||||
|
||||
if cell_value is None:
|
||||
continue
|
||||
|
||||
id_str = str(cell_value).strip()
|
||||
|
||||
if id_str:
|
||||
ids.append(id_str)
|
||||
|
||||
return ids
|
||||
|
||||
|
||||
def build_property_id(address: str, postcode: str) -> str:
|
||||
"""
|
||||
Extract number from address and concat with postcode
|
||||
Example:
|
||||
'9 Random Close', 'AB1 2YZ' → '9AB12YZ'
|
||||
"""
|
||||
number = address.split(" ")[0]
|
||||
|
||||
postcode_clean = postcode.replace(" ", "").upper()
|
||||
|
||||
return f"{number}{postcode_clean}"
|
||||
|
||||
|
||||
def download_report():
|
||||
username = ""
|
||||
password = ""
|
||||
|
||||
property_list_file = (
|
||||
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
|
||||
)
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
||||
filepath = os.path.join(
|
||||
BASE_DIR,
|
||||
property_list_file,
|
||||
)
|
||||
property_ids: List[str] = extract_ids_from_spreadsheet(filepath)
|
||||
|
||||
matching_properties: List[str] = []
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
|
||||
|
|
@ -23,10 +89,9 @@ def download_report():
|
|||
page = context.new_page()
|
||||
|
||||
try:
|
||||
# 1. Go to site
|
||||
# Log into ECMK with playwright
|
||||
page.goto("https://assessorhub.net/", timeout=30000)
|
||||
|
||||
# 2. Login (UPDATE selectors if needed)
|
||||
username_input = page.locator("#Username")
|
||||
password_input = page.locator("#Password")
|
||||
|
||||
|
|
@ -36,40 +101,90 @@ def download_report():
|
|||
password_input.wait_for(state="visible", timeout=10000)
|
||||
password_input.fill(password)
|
||||
|
||||
# 3. Submit login
|
||||
with page.expect_navigation(timeout=15000):
|
||||
page.click("button[type='submit']")
|
||||
|
||||
# 4. Verify login succeeded
|
||||
if "login" in page.url.lower():
|
||||
raise Exception("Login failed")
|
||||
|
||||
print("Login successful:", page.url)
|
||||
|
||||
page.goto("https://assessorhub.net/Companies/Assessments", timeout=30000)
|
||||
page.wait_for_selector("#assessmentDatatable tbody tr", timeout=20000)
|
||||
|
||||
while True:
|
||||
rows = page.locator("#assessmentDatatable tbody tr")
|
||||
row_count = rows.count()
|
||||
|
||||
logger.info(f"Processing {row_count} rows on current page")
|
||||
|
||||
for i in range(row_count):
|
||||
row = rows.nth(i)
|
||||
|
||||
try:
|
||||
cells = row.locator("td")
|
||||
|
||||
address = cells.nth(5).inner_text().strip()
|
||||
postcode = cells.nth(7).inner_text().strip()
|
||||
first_name = cells.nth(1).inner_text().strip()
|
||||
last_name = cells.nth(2).inner_text().strip()
|
||||
status = cells.nth(9).inner_text().strip()
|
||||
|
||||
if first_name == "Oliver" and last_name == "Stephens":
|
||||
continue
|
||||
|
||||
if status != "Submitted (not Lodged)":
|
||||
continue
|
||||
|
||||
property_id = build_property_id(address, postcode)
|
||||
|
||||
if property_id not in property_ids:
|
||||
continue
|
||||
|
||||
logger.info(f"MATCH FOUND: {property_id}")
|
||||
matching_properties.append(property_id)
|
||||
|
||||
except PlaywrightTimeoutError as e:
|
||||
raise Exception(f"Timeout occurred: {str(e)}")
|
||||
|
||||
next_button: Locator = page.locator("#assessmentDatatable_next a")
|
||||
class_attr = next_button.get_attribute("class") or ""
|
||||
|
||||
if "disabled" in class_attr:
|
||||
logger.info("No more pages")
|
||||
break
|
||||
|
||||
# first_row_text = rows.first.inner_text()
|
||||
|
||||
next_button.scroll_into_view_if_needed()
|
||||
next_button.click()
|
||||
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
# 5. Navigate to the assessment detail page
|
||||
page.goto(
|
||||
"https://assessorhub.net/Assessments/Assessments/Detail/1bd9fd74-08f6-4fc1-b2f7-3a13a8f9084d?returnUrl=/Companies/Assessments",
|
||||
timeout=30000,
|
||||
)
|
||||
# page.goto(
|
||||
# "https://assessorhub.net/Assessments/Assessments/Detail/1bd9fd74-08f6-4fc1-b2f7-3a13a8f9084d?returnUrl=/Companies/Assessments",
|
||||
# timeout=30000,
|
||||
# )
|
||||
|
||||
# 6. Locate the correct download button
|
||||
button = page.locator("a.download-report-btn[data-report-type='11']")
|
||||
# # 6. Locate the correct download button
|
||||
# button = page.locator("a.download-report-btn[data-report-type='11']")
|
||||
|
||||
button.wait_for(state="visible", timeout=10000)
|
||||
# button.wait_for(state="visible", timeout=10000)
|
||||
|
||||
# 7. Click and capture the download
|
||||
with page.expect_download(timeout=30000) as download_info:
|
||||
button.click()
|
||||
# # 7. Click and capture the download
|
||||
# with page.expect_download(timeout=30000) as download_info:
|
||||
# button.click()
|
||||
|
||||
download = download_info.value
|
||||
# download = download_info.value
|
||||
|
||||
# 8. Save file locally
|
||||
filename = download.suggested_filename
|
||||
save_path = os.path.join(os.getcwd(), filename)
|
||||
# # 8. Save file locally
|
||||
# filename = download.suggested_filename
|
||||
# save_path = os.path.join(os.getcwd(), filename)
|
||||
|
||||
download.save_as(save_path)
|
||||
# download.save_as(save_path)
|
||||
|
||||
print(f"Downloaded file saved to: {save_path}")
|
||||
# print(f"Downloaded file saved to: {save_path}")
|
||||
|
||||
except PlaywrightTimeoutError as e:
|
||||
raise Exception(f"Timeout occurred: {str(e)}")
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Add table
Reference in a new issue