mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
126 lines
4.6 KiB
Python
126 lines
4.6 KiB
Python
import os
|
|
from datetime import datetime, timezone
|
|
from typing import List, Optional, Tuple, cast
|
|
|
|
from backend.app.db.connection import db_session
|
|
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum, UploadedFile
|
|
from backend.documents_parser.db_writer import save_epc_property_data
|
|
from backend.documents_parser.parser import parse_site_notes_pdf
|
|
from backend.pashub_fetcher.core_files import infer_file_type
|
|
from backend.pashub_fetcher.pashub_client import PashubClient
|
|
from backend.pashub_fetcher.pashub_to_ara_trigger_request import PashubToAraTriggerRequest
|
|
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
|
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
|
from utils.logger import setup_logger
|
|
from utils.s3 import upload_file_to_s3
|
|
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
|
|
|
logger = setup_logger()
|
|
|
|
|
|
class PashubService:
|
|
def __init__(
|
|
self,
|
|
pashub_client: PashubClient,
|
|
sharepoint_client: DomnaSharepointClient,
|
|
s3_bucket: str,
|
|
) -> None:
|
|
self._pashub_client = pashub_client
|
|
self._sharepoint_client = sharepoint_client
|
|
self._s3_bucket = s3_bucket
|
|
|
|
def run(self, request: PashubToAraTriggerRequest) -> List[str]:
|
|
job_id = request.pashub_job_id
|
|
|
|
uprn: Optional[str] = request.uprn or self._pashub_client.get_uprn_by_job_id(job_id)
|
|
hubspot_deal_id: Optional[str] = request.hubspot_deal_id
|
|
|
|
if uprn:
|
|
logger.info(f"Got UPRN {uprn} for job {job_id}")
|
|
else:
|
|
logger.info(f"No UPRN found for job {job_id}")
|
|
|
|
job_files: List[str] = self._pashub_client.get_core_evidence_files_by_job_id(job_id)
|
|
|
|
if uprn or hubspot_deal_id:
|
|
logger.info("Uploading files to s3")
|
|
self._upload_to_s3_and_update_db(job_files, uprn, hubspot_deal_id)
|
|
|
|
# SharePoint upload disabled: pashub sharepoint_link is inconsistent
|
|
# (points to property or project unpredictably)
|
|
# if request.sharepoint_link:
|
|
# self._upload_to_sharepoint(request.sharepoint_link, job_files)
|
|
|
|
for file_path in job_files:
|
|
try:
|
|
os.remove(file_path)
|
|
except OSError:
|
|
logger.warning(f"Failed to delete temp file {file_path}")
|
|
|
|
return job_files
|
|
|
|
def _upload_to_s3_and_update_db(
|
|
self,
|
|
job_files: List[str],
|
|
uprn: Optional[str],
|
|
hubspot_deal_id: Optional[str],
|
|
) -> None:
|
|
if not uprn and not hubspot_deal_id:
|
|
return
|
|
|
|
base_path = (
|
|
f"documents/uprn/{uprn}"
|
|
if uprn
|
|
else f"documents/hubspot_deal_id/{hubspot_deal_id}"
|
|
)
|
|
|
|
uploaded_files: List[UploadedFile] = []
|
|
site_notes_pairs: List[Tuple[UploadedFile, EpcPropertyData]] = []
|
|
|
|
for file_path in job_files:
|
|
filename = os.path.basename(file_path)
|
|
file_key = f"{base_path}/{filename}"
|
|
|
|
upload_file_to_s3(file_path, self._s3_bucket, file_key)
|
|
|
|
uploaded_file = UploadedFile(
|
|
s3_file_bucket=self._s3_bucket,
|
|
s3_file_key=file_key,
|
|
s3_upload_timestamp=datetime.now(timezone.utc),
|
|
uprn=int(uprn) if uprn else None,
|
|
hubspot_deal_id=hubspot_deal_id,
|
|
file_source=FileSourceEnum.PAS_HUB.value,
|
|
file_type=infer_file_type(filename),
|
|
)
|
|
uploaded_files.append(uploaded_file)
|
|
|
|
file_type: Optional[str] = cast(Optional[str], uploaded_file.file_type)
|
|
if file_type is not None and FileTypeEnum(file_type) == FileTypeEnum.RD_SAP_SITE_NOTE:
|
|
try:
|
|
site_notes_pairs.append(
|
|
(uploaded_file, parse_site_notes_pdf(file_path))
|
|
)
|
|
except Exception:
|
|
logger.warning(f"Failed to parse site notes {file_path}", exc_info=True)
|
|
|
|
with db_session() as session:
|
|
session.add_all(uploaded_files)
|
|
session.flush()
|
|
|
|
for uploaded_file, epc_data in site_notes_pairs:
|
|
save_epc_property_data(
|
|
session, epc_data, uploaded_file_id=cast(int, uploaded_file.id)
|
|
)
|
|
|
|
session.commit()
|
|
|
|
def _upload_to_sharepoint(
|
|
self,
|
|
sharepoint_link: str,
|
|
job_files: List[str],
|
|
) -> None:
|
|
assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}"
|
|
|
|
for file_path in job_files:
|
|
filename = file_path.split("/")[-1]
|
|
self._sharepoint_client.upload_file(file_path, assessment_path, filename)
|