Model/backend/ecmk_fetcher/processor.py
2026-04-13 11:35:02 +00:00

181 lines
6.5 KiB
Python

import os
from typing import Dict
from playwright.sync_api import (
sync_playwright,
Locator,
Page,
Browser,
BrowserContext,
)
from backend.app.db.functions.uploaded_files_functions import (
get_uploaded_file_by_listing_type_and_source,
)
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
from backend.ecmk_fetcher.address_list import (
PropertyRow,
extract_addresses_from_spreadsheet,
)
from backend.ecmk_fetcher.browser import (
attach_debug_listeners,
download_with_retry,
go_to_assessment_details,
go_to_assessments,
go_to_next_page,
login,
)
from backend.ecmk_fetcher.reports import (
REPORT_TYPES,
build_property_id,
map_report_type_to_db_file_type,
)
from backend.ecmk_fetcher.upload import (
upload_file_to_s3_and_update_db,
upload_file_to_sharepoint,
)
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
logger = setup_logger()
def run_job() -> None:
username: str = "joseph.westwood@domna.homes" # TODO: get from github secrets
password: str = "DomnaTest123!"
property_list_file: str = (
"hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx"
)
BASE_DIR: str = os.path.dirname(__file__)
filepath: str = os.path.join(BASE_DIR, property_list_file)
property_map: Dict[str, PropertyRow] = extract_addresses_from_spreadsheet(filepath)
sharepoint_client: DomnaSharepointClient = DomnaSharepointClient(
sharepoint_location=DomnaSites.PRIVATE_PAY
)
sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
s3_bucket: str = "retrofit-energy-assessments-dev"
with sync_playwright() as p:
browser: Browser = p.chromium.launch(headless=True)
context: BrowserContext = browser.new_context()
page: Page = context.new_page()
attach_debug_listeners(page)
try:
login(page, username, password)
go_to_assessments(page)
while True:
rows: Locator = page.locator("#assessmentDatatable tbody tr")
row_count: int = rows.count()
for i in range(row_count):
row: Locator = rows.nth(i)
try:
cells: Locator = row.locator("td")
first_name: str = cells.nth(1).inner_text().strip()
last_name: str = cells.nth(2).inner_text().strip()
address: str = cells.nth(5).inner_text().strip()
postcode: str = cells.nth(7).inner_text().strip()
status: str = cells.nth(9).inner_text().strip()
if first_name == "Oliver" and last_name == "Stephens":
continue
if status != "Submitted (not Lodged)":
continue
property_id: str = build_property_id(address, postcode)
property_row: PropertyRow | None = property_map.get(property_id)
if not property_row:
continue
logger.info(f"Match found for property {address}")
sharepoint_address: str = property_row.address
go_to_assessment_details(page, row)
for report_type in REPORT_TYPES:
hubspot_listing_id: str = property_row.listing_id
try:
db_file_type: FileTypeEnum = (
map_report_type_to_db_file_type(report_type)
)
except ValueError:
logger.error(
f"Unknown report type {report_type}, skipping file"
)
continue
if get_uploaded_file_by_listing_type_and_source(
hubspot_listing_id=int(hubspot_listing_id),
file_type=db_file_type,
file_source=FileSourceEnum.ECMK,
):
logger.debug("File already uploaded to s3, skipping")
continue
file_path: str | None = download_with_retry(
page, report_type
)
if not file_path:
continue
logger.info(
f"Successfully downloaded file {os.path.basename(file_path)} from ECMK"
)
try:
upload_file_to_sharepoint(
client=sharepoint_client,
file_path=file_path,
base_path=sharepoint_base_path,
subpath=sharepoint_address,
)
logger.info(
f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
)
# Upload to s3 and update db
upload_file_to_s3_and_update_db(
bucket=s3_bucket,
file_path=file_path,
hubspot_listing_id=hubspot_listing_id,
file_type=db_file_type,
)
except Exception:
raise
finally:
if os.path.exists(file_path):
os.remove(file_path)
page.go_back()
page.wait_for_selector(
"#assessmentDatatable tbody tr", timeout=15000
)
except Exception as e:
raise Exception(f"Row processing failed: {str(e)}") from e
if not go_to_next_page(page):
break
finally:
context.close()
browser.close()