mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
read address list from file and find matching rows in ecmk datatable
This commit is contained in:
parent
aa5cff4d19
commit
b121413b22
2 changed files with 137 additions and 22 deletions
|
|
@ -1,7 +1,16 @@
|
||||||
import os
|
import os
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Mapping
|
from typing import Any, List, Mapping
|
||||||
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
|
from openpyxl import load_workbook
|
||||||
|
from playwright.sync_api import (
|
||||||
|
Locator,
|
||||||
|
sync_playwright,
|
||||||
|
TimeoutError as PlaywrightTimeoutError,
|
||||||
|
)
|
||||||
|
|
||||||
|
from utils.logger import setup_logger
|
||||||
|
|
||||||
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
class file_download_button_types(Enum):
|
class file_download_button_types(Enum):
|
||||||
|
|
@ -12,10 +21,67 @@ class file_download_button_types(Enum):
|
||||||
SAP_WORK_SHEET = 15
|
SAP_WORK_SHEET = 15
|
||||||
|
|
||||||
|
|
||||||
|
def extract_ids_from_spreadsheet(filepath: str) -> List[str]:
|
||||||
|
wb = load_workbook(filepath, data_only=True)
|
||||||
|
ws = wb["Southern RA-Lite Programme 3103"]
|
||||||
|
|
||||||
|
ids: List[str] = []
|
||||||
|
|
||||||
|
header_row = 1
|
||||||
|
id_col_index = None
|
||||||
|
|
||||||
|
for col in range(1, ws.max_column + 1):
|
||||||
|
cell_value = ws.cell(row=header_row, column=col).value
|
||||||
|
if cell_value and str(cell_value).strip().lower() == "id":
|
||||||
|
id_col_index = col
|
||||||
|
break
|
||||||
|
|
||||||
|
if id_col_index is None:
|
||||||
|
raise Exception("ID column not found in spreadsheet")
|
||||||
|
|
||||||
|
for row in range(2, ws.max_row + 1):
|
||||||
|
cell_value = ws.cell(row=row, column=id_col_index).value
|
||||||
|
|
||||||
|
if cell_value is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
id_str = str(cell_value).strip()
|
||||||
|
|
||||||
|
if id_str:
|
||||||
|
ids.append(id_str)
|
||||||
|
|
||||||
|
return ids
|
||||||
|
|
||||||
|
|
||||||
|
def build_property_id(address: str, postcode: str) -> str:
|
||||||
|
"""
|
||||||
|
Extract number from address and concat with postcode
|
||||||
|
Example:
|
||||||
|
'9 Random Close', 'AB1 2YZ' → '9AB12YZ'
|
||||||
|
"""
|
||||||
|
number = address.split(" ")[0]
|
||||||
|
|
||||||
|
postcode_clean = postcode.replace(" ", "").upper()
|
||||||
|
|
||||||
|
return f"{number}{postcode_clean}"
|
||||||
|
|
||||||
|
|
||||||
def download_report():
|
def download_report():
|
||||||
username = ""
|
username = ""
|
||||||
password = ""
|
password = ""
|
||||||
|
|
||||||
|
property_list_file = (
|
||||||
|
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
|
||||||
|
)
|
||||||
|
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
||||||
|
filepath = os.path.join(
|
||||||
|
BASE_DIR,
|
||||||
|
property_list_file,
|
||||||
|
)
|
||||||
|
property_ids: List[str] = extract_ids_from_spreadsheet(filepath)
|
||||||
|
|
||||||
|
matching_properties: List[str] = []
|
||||||
|
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
browser = p.chromium.launch(headless=True)
|
browser = p.chromium.launch(headless=True)
|
||||||
|
|
||||||
|
|
@ -23,10 +89,9 @@ def download_report():
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Go to site
|
# Log into ECMK with playwright
|
||||||
page.goto("https://assessorhub.net/", timeout=30000)
|
page.goto("https://assessorhub.net/", timeout=30000)
|
||||||
|
|
||||||
# 2. Login (UPDATE selectors if needed)
|
|
||||||
username_input = page.locator("#Username")
|
username_input = page.locator("#Username")
|
||||||
password_input = page.locator("#Password")
|
password_input = page.locator("#Password")
|
||||||
|
|
||||||
|
|
@ -36,40 +101,90 @@ def download_report():
|
||||||
password_input.wait_for(state="visible", timeout=10000)
|
password_input.wait_for(state="visible", timeout=10000)
|
||||||
password_input.fill(password)
|
password_input.fill(password)
|
||||||
|
|
||||||
# 3. Submit login
|
|
||||||
with page.expect_navigation(timeout=15000):
|
with page.expect_navigation(timeout=15000):
|
||||||
page.click("button[type='submit']")
|
page.click("button[type='submit']")
|
||||||
|
|
||||||
# 4. Verify login succeeded
|
|
||||||
if "login" in page.url.lower():
|
if "login" in page.url.lower():
|
||||||
raise Exception("Login failed")
|
raise Exception("Login failed")
|
||||||
|
|
||||||
print("Login successful:", page.url)
|
print("Login successful:", page.url)
|
||||||
|
|
||||||
|
page.goto("https://assessorhub.net/Companies/Assessments", timeout=30000)
|
||||||
|
page.wait_for_selector("#assessmentDatatable tbody tr", timeout=20000)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
rows = page.locator("#assessmentDatatable tbody tr")
|
||||||
|
row_count = rows.count()
|
||||||
|
|
||||||
|
logger.info(f"Processing {row_count} rows on current page")
|
||||||
|
|
||||||
|
for i in range(row_count):
|
||||||
|
row = rows.nth(i)
|
||||||
|
|
||||||
|
try:
|
||||||
|
cells = row.locator("td")
|
||||||
|
|
||||||
|
address = cells.nth(5).inner_text().strip()
|
||||||
|
postcode = cells.nth(7).inner_text().strip()
|
||||||
|
first_name = cells.nth(1).inner_text().strip()
|
||||||
|
last_name = cells.nth(2).inner_text().strip()
|
||||||
|
status = cells.nth(9).inner_text().strip()
|
||||||
|
|
||||||
|
if first_name == "Oliver" and last_name == "Stephens":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if status != "Submitted (not Lodged)":
|
||||||
|
continue
|
||||||
|
|
||||||
|
property_id = build_property_id(address, postcode)
|
||||||
|
|
||||||
|
if property_id not in property_ids:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"MATCH FOUND: {property_id}")
|
||||||
|
matching_properties.append(property_id)
|
||||||
|
|
||||||
|
except PlaywrightTimeoutError as e:
|
||||||
|
raise Exception(f"Timeout occurred: {str(e)}")
|
||||||
|
|
||||||
|
next_button: Locator = page.locator("#assessmentDatatable_next a")
|
||||||
|
class_attr = next_button.get_attribute("class") or ""
|
||||||
|
|
||||||
|
if "disabled" in class_attr:
|
||||||
|
logger.info("No more pages")
|
||||||
|
break
|
||||||
|
|
||||||
|
# first_row_text = rows.first.inner_text()
|
||||||
|
|
||||||
|
next_button.scroll_into_view_if_needed()
|
||||||
|
next_button.click()
|
||||||
|
|
||||||
|
page.wait_for_timeout(2000)
|
||||||
|
|
||||||
# 5. Navigate to the assessment detail page
|
# 5. Navigate to the assessment detail page
|
||||||
page.goto(
|
# page.goto(
|
||||||
"https://assessorhub.net/Assessments/Assessments/Detail/1bd9fd74-08f6-4fc1-b2f7-3a13a8f9084d?returnUrl=/Companies/Assessments",
|
# "https://assessorhub.net/Assessments/Assessments/Detail/1bd9fd74-08f6-4fc1-b2f7-3a13a8f9084d?returnUrl=/Companies/Assessments",
|
||||||
timeout=30000,
|
# timeout=30000,
|
||||||
)
|
# )
|
||||||
|
|
||||||
# 6. Locate the correct download button
|
# # 6. Locate the correct download button
|
||||||
button = page.locator("a.download-report-btn[data-report-type='11']")
|
# button = page.locator("a.download-report-btn[data-report-type='11']")
|
||||||
|
|
||||||
button.wait_for(state="visible", timeout=10000)
|
# button.wait_for(state="visible", timeout=10000)
|
||||||
|
|
||||||
# 7. Click and capture the download
|
# # 7. Click and capture the download
|
||||||
with page.expect_download(timeout=30000) as download_info:
|
# with page.expect_download(timeout=30000) as download_info:
|
||||||
button.click()
|
# button.click()
|
||||||
|
|
||||||
download = download_info.value
|
# download = download_info.value
|
||||||
|
|
||||||
# 8. Save file locally
|
# # 8. Save file locally
|
||||||
filename = download.suggested_filename
|
# filename = download.suggested_filename
|
||||||
save_path = os.path.join(os.getcwd(), filename)
|
# save_path = os.path.join(os.getcwd(), filename)
|
||||||
|
|
||||||
download.save_as(save_path)
|
# download.save_as(save_path)
|
||||||
|
|
||||||
print(f"Downloaded file saved to: {save_path}")
|
# print(f"Downloaded file saved to: {save_path}")
|
||||||
|
|
||||||
except PlaywrightTimeoutError as e:
|
except PlaywrightTimeoutError as e:
|
||||||
raise Exception(f"Timeout occurred: {str(e)}")
|
raise Exception(f"Timeout occurred: {str(e)}")
|
||||||
|
|
|
||||||
Binary file not shown.
Loading…
Add table
Reference in a new issue