diff --git a/backend/app/db/functions/uploaded_files_functions.py b/backend/app/db/functions/uploaded_files_functions.py new file mode 100644 index 00000000..3708813a --- /dev/null +++ b/backend/app/db/functions/uploaded_files_functions.py @@ -0,0 +1,25 @@ +from typing import Optional + +from sqlalchemy import select + +from backend.app.db.connection import db_read_session +from backend.app.db.models.uploaded_file import ( + FileSourceEnum, + FileTypeEnum, + UploadedFile, +) + + +def get_uploaded_file_by_listing_type_and_source( + hubspot_listing_id: int, + file_type: FileTypeEnum, + file_source: FileSourceEnum, +) -> Optional[UploadedFile]: + with db_read_session() as session: + statement = select(UploadedFile).where( + UploadedFile.hubspot_listing_id == hubspot_listing_id, + UploadedFile.file_type == file_type, + UploadedFile.file_source == file_source, + ) + + return session.exec(statement).one_or_none() diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index 9b751d34..8decfd1b 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -14,6 +14,8 @@ class FileTypeEnum(enum.Enum): PAR_PHOTO_PACK = "par_photo_pack" PAS_2023_PROPERTY = "pas_2023_property" PAS_2023_OCCUPANCY = "pas_2023_occupancy" + ECMK_SITE_NOTE = "ecmk_site_note" + ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note" class FileSourceEnum(enum.Enum): diff --git a/backend/ecmk_fetcher/.~lock.hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx# b/backend/ecmk_fetcher/.~lock.hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx# new file mode 100644 index 00000000..4b57053e --- /dev/null +++ b/backend/ecmk_fetcher/.~lock.hubspot-crm-exports-southern-ra-lite-programme-3103-2026-03-31-2.xlsx# @@ -0,0 +1 @@ +,daniel,daniel-Dell-15-DC15250,07.04.2026 11:47,/home/daniel/snap/onlyoffice-desktopeditors/1067/.local/share/onlyoffice; \ No newline at end of file diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py index 4c841a19..dc52c342 100644 --- a/backend/ecmk_fetcher/processor.py +++ b/backend/ecmk_fetcher/processor.py @@ -8,6 +8,10 @@ from playwright.sync_api import ( BrowserContext, ) +from backend.app.db.functions.uploaded_files_functions import ( + get_uploaded_file_by_listing_type_and_source, +) +from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum from backend.ecmk_fetcher.address_list import ( PropertyRow, extract_addresses_from_spreadsheet, @@ -20,7 +24,11 @@ from backend.ecmk_fetcher.browser import ( go_to_next_page, login, ) -from backend.ecmk_fetcher.reports import REPORT_TYPES, build_property_id +from backend.ecmk_fetcher.reports import ( + REPORT_TYPES, + build_property_id, + map_report_type_to_db_file_type, +) from backend.ecmk_fetcher.sharepoint import upload_file_to_sharepoint from utils.logger import setup_logger from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient @@ -48,6 +56,8 @@ def run_job() -> None: sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments" + # s3_bucket: str = "retrofit-energy-assessments-dev" + with sync_playwright() as p: browser: Browser = p.chromium.launch(headless=True) context: BrowserContext = browser.new_context() @@ -92,12 +102,29 @@ def run_job() -> None: sharepoint_address: str = property_row.address - # Check whether files have already been processed before continuing with this property - # hubspot_listing_id: str = property_row.listing_id - go_to_assessment_details(page, row) for report_type in REPORT_TYPES: + hubspot_listing_id: str = property_row.listing_id + try: + db_file_type: FileTypeEnum = ( + map_report_type_to_db_file_type(report_type) + ) + + except ValueError: + logger.error( + f"Unknown report type {report_type}, skipping file" + ) + continue + + if get_uploaded_file_by_listing_type_and_source( + hubspot_listing_id=int(hubspot_listing_id), + file_type=db_file_type, + file_source=FileSourceEnum.ECMK, + ): + logger.debug("File already uploaded to s3, skipping") + continue + file_path: str | None = download_with_retry( page, report_type ) diff --git a/backend/ecmk_fetcher/reports.py b/backend/ecmk_fetcher/reports.py index a8f12792..d8d11d50 100644 --- a/backend/ecmk_fetcher/reports.py +++ b/backend/ecmk_fetcher/reports.py @@ -1,5 +1,7 @@ from enum import Enum +from backend.app.db.models.uploaded_file import FileTypeEnum + class FileDownloadButtonType(Enum): ASSESSOR_HUB_SITENOTE_REPORT = 11 @@ -15,6 +17,16 @@ REPORT_TYPES = [ ] +def map_report_type_to_db_file_type(report_type: int) -> FileTypeEnum: + match report_type: + case FileDownloadButtonType.ASSESSOR_HUB_SITENOTE_REPORT.value: + return FileTypeEnum.ECMK_SITE_NOTE + case FileDownloadButtonType.SITENOTE_REPORT.value: + return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE + case _: + raise ValueError("Unknown report type") + + def build_report_selector(report_type: int) -> str: return f"a.download-report-btn[data-report-type='{report_type}']"