mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
skip file if already processed according to db
This commit is contained in:
parent
849a272974
commit
15f1fde16a
5 changed files with 71 additions and 4 deletions
25
backend/app/db/functions/uploaded_files_functions.py
Normal file
25
backend/app/db/functions/uploaded_files_functions.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from typing import Optional
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from backend.app.db.connection import db_read_session
|
||||
from backend.app.db.models.uploaded_file import (
|
||||
FileSourceEnum,
|
||||
FileTypeEnum,
|
||||
UploadedFile,
|
||||
)
|
||||
|
||||
|
||||
def get_uploaded_file_by_listing_type_and_source(
|
||||
hubspot_listing_id: int,
|
||||
file_type: FileTypeEnum,
|
||||
file_source: FileSourceEnum,
|
||||
) -> Optional[UploadedFile]:
|
||||
with db_read_session() as session:
|
||||
statement = select(UploadedFile).where(
|
||||
UploadedFile.hubspot_listing_id == hubspot_listing_id,
|
||||
UploadedFile.file_type == file_type,
|
||||
UploadedFile.file_source == file_source,
|
||||
)
|
||||
|
||||
return session.exec(statement).one_or_none()
|
||||
|
|
@ -14,6 +14,8 @@ class FileTypeEnum(enum.Enum):
|
|||
PAR_PHOTO_PACK = "par_photo_pack"
|
||||
PAS_2023_PROPERTY = "pas_2023_property"
|
||||
PAS_2023_OCCUPANCY = "pas_2023_occupancy"
|
||||
ECMK_SITE_NOTE = "ecmk_site_note"
|
||||
ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
|
||||
|
||||
|
||||
class FileSourceEnum(enum.Enum):
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
,daniel,daniel-Dell-15-DC15250,07.04.2026 11:47,/home/daniel/snap/onlyoffice-desktopeditors/1067/.local/share/onlyoffice;
|
||||
|
|
@ -8,6 +8,10 @@ from playwright.sync_api import (
|
|||
BrowserContext,
|
||||
)
|
||||
|
||||
from backend.app.db.functions.uploaded_files_functions import (
|
||||
get_uploaded_file_by_listing_type_and_source,
|
||||
)
|
||||
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
|
||||
from backend.ecmk_fetcher.address_list import (
|
||||
PropertyRow,
|
||||
extract_addresses_from_spreadsheet,
|
||||
|
|
@ -20,7 +24,11 @@ from backend.ecmk_fetcher.browser import (
|
|||
go_to_next_page,
|
||||
login,
|
||||
)
|
||||
from backend.ecmk_fetcher.reports import REPORT_TYPES, build_property_id
|
||||
from backend.ecmk_fetcher.reports import (
|
||||
REPORT_TYPES,
|
||||
build_property_id,
|
||||
map_report_type_to_db_file_type,
|
||||
)
|
||||
from backend.ecmk_fetcher.sharepoint import upload_file_to_sharepoint
|
||||
from utils.logger import setup_logger
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
|
|
@ -48,6 +56,8 @@ def run_job() -> None:
|
|||
|
||||
sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
|
||||
|
||||
# s3_bucket: str = "retrofit-energy-assessments-dev"
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser: Browser = p.chromium.launch(headless=True)
|
||||
context: BrowserContext = browser.new_context()
|
||||
|
|
@ -92,12 +102,29 @@ def run_job() -> None:
|
|||
|
||||
sharepoint_address: str = property_row.address
|
||||
|
||||
# Check whether files have already been processed before continuing with this property
|
||||
# hubspot_listing_id: str = property_row.listing_id
|
||||
|
||||
go_to_assessment_details(page, row)
|
||||
|
||||
for report_type in REPORT_TYPES:
|
||||
hubspot_listing_id: str = property_row.listing_id
|
||||
try:
|
||||
db_file_type: FileTypeEnum = (
|
||||
map_report_type_to_db_file_type(report_type)
|
||||
)
|
||||
|
||||
except ValueError:
|
||||
logger.error(
|
||||
f"Unknown report type {report_type}, skipping file"
|
||||
)
|
||||
continue
|
||||
|
||||
if get_uploaded_file_by_listing_type_and_source(
|
||||
hubspot_listing_id=int(hubspot_listing_id),
|
||||
file_type=db_file_type,
|
||||
file_source=FileSourceEnum.ECMK,
|
||||
):
|
||||
logger.debug("File already uploaded to s3, skipping")
|
||||
continue
|
||||
|
||||
file_path: str | None = download_with_retry(
|
||||
page, report_type
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
from enum import Enum
|
||||
|
||||
from backend.app.db.models.uploaded_file import FileTypeEnum
|
||||
|
||||
|
||||
class FileDownloadButtonType(Enum):
|
||||
ASSESSOR_HUB_SITENOTE_REPORT = 11
|
||||
|
|
@ -15,6 +17,16 @@ REPORT_TYPES = [
|
|||
]
|
||||
|
||||
|
||||
def map_report_type_to_db_file_type(report_type: int) -> FileTypeEnum:
|
||||
match report_type:
|
||||
case FileDownloadButtonType.ASSESSOR_HUB_SITENOTE_REPORT.value:
|
||||
return FileTypeEnum.ECMK_SITE_NOTE
|
||||
case FileDownloadButtonType.SITENOTE_REPORT.value:
|
||||
return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE
|
||||
case _:
|
||||
raise ValueError("Unknown report type")
|
||||
|
||||
|
||||
def build_report_selector(report_type: int) -> str:
|
||||
return f"a.download-report-btn[data-report-type='{report_type}']"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue