mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Refactor Pashub service so that site notes parsing and upload is separate from saving of files
This commit is contained in:
parent
bbd653f83c
commit
d522fcdb94
1 changed files with 55 additions and 24 deletions
|
|
@ -1,16 +1,21 @@
|
|||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Optional, Tuple, cast
|
||||
from typing import List, NamedTuple, Optional, cast
|
||||
|
||||
from backend.app.db.connection import db_session
|
||||
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum, UploadedFile
|
||||
from backend.app.db.models.uploaded_file import (
|
||||
FileSourceEnum,
|
||||
FileTypeEnum,
|
||||
UploadedFile,
|
||||
)
|
||||
from backend.documents_parser.db_writer import save_epc_property_data
|
||||
from backend.documents_parser.parser import parse_site_notes_pdf
|
||||
from backend.pashub_fetcher.core_files import infer_file_type
|
||||
from backend.pashub_fetcher.pashub_client import PashubClient
|
||||
from backend.pashub_fetcher.pashub_to_ara_trigger_request import PashubToAraTriggerRequest
|
||||
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
|
||||
PashubToAraTriggerRequest,
|
||||
)
|
||||
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import upload_file_to_s3
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
|
|
@ -18,6 +23,12 @@ from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
|||
logger = setup_logger()
|
||||
|
||||
|
||||
class _FileUploadRecord(NamedTuple):
|
||||
file_path: str
|
||||
file_type: Optional[str]
|
||||
uploaded_file_id: int
|
||||
|
||||
|
||||
class PashubService:
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -32,7 +43,9 @@ class PashubService:
|
|||
def run(self, request: PashubToAraTriggerRequest) -> List[str]:
|
||||
job_id = request.pashub_job_id
|
||||
|
||||
uprn: Optional[str] = request.uprn or self._pashub_client.get_uprn_by_job_id(job_id)
|
||||
uprn: Optional[str] = request.uprn or self._pashub_client.get_uprn_by_job_id(
|
||||
job_id
|
||||
)
|
||||
hubspot_deal_id: Optional[str] = request.hubspot_deal_id
|
||||
|
||||
if uprn:
|
||||
|
|
@ -40,11 +53,16 @@ class PashubService:
|
|||
else:
|
||||
logger.info(f"No UPRN found for job {job_id}")
|
||||
|
||||
job_files: List[str] = self._pashub_client.get_core_evidence_files_by_job_id(job_id)
|
||||
job_files: List[str] = self._pashub_client.get_core_evidence_files_by_job_id(
|
||||
job_id
|
||||
)
|
||||
|
||||
if uprn or hubspot_deal_id:
|
||||
logger.info("Uploading files to s3")
|
||||
self._upload_to_s3_and_update_db(job_files, uprn, hubspot_deal_id)
|
||||
upload_records = self._upload_to_s3_and_update_db(
|
||||
job_files, uprn, hubspot_deal_id
|
||||
)
|
||||
self._save_site_notes(upload_records)
|
||||
|
||||
# SharePoint upload disabled: pashub sharepoint_link is inconsistent
|
||||
# (points to property or project unpredictably)
|
||||
|
|
@ -64,9 +82,9 @@ class PashubService:
|
|||
job_files: List[str],
|
||||
uprn: Optional[str],
|
||||
hubspot_deal_id: Optional[str],
|
||||
) -> None:
|
||||
) -> List[_FileUploadRecord]:
|
||||
if not uprn and not hubspot_deal_id:
|
||||
return
|
||||
return []
|
||||
|
||||
base_path = (
|
||||
f"documents/uprn/{uprn}"
|
||||
|
|
@ -74,8 +92,8 @@ class PashubService:
|
|||
else f"documents/hubspot_deal_id/{hubspot_deal_id}"
|
||||
)
|
||||
|
||||
file_paths: List[str] = []
|
||||
uploaded_files: List[UploadedFile] = []
|
||||
site_notes_pairs: List[Tuple[UploadedFile, EpcPropertyData]] = []
|
||||
|
||||
for file_path in job_files:
|
||||
filename = os.path.basename(file_path)
|
||||
|
|
@ -92,27 +110,40 @@ class PashubService:
|
|||
file_source=FileSourceEnum.PAS_HUB.value,
|
||||
file_type=infer_file_type(filename),
|
||||
)
|
||||
file_paths.append(file_path)
|
||||
uploaded_files.append(uploaded_file)
|
||||
|
||||
file_type: Optional[str] = cast(Optional[str], uploaded_file.file_type)
|
||||
if file_type is not None and FileTypeEnum(file_type) == FileTypeEnum.RD_SAP_SITE_NOTE:
|
||||
try:
|
||||
site_notes_pairs.append(
|
||||
(uploaded_file, parse_site_notes_pdf(file_path))
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(f"Failed to parse site notes {file_path}", exc_info=True)
|
||||
|
||||
with db_session() as session:
|
||||
session.add_all(uploaded_files)
|
||||
session.flush()
|
||||
|
||||
for uploaded_file, epc_data in site_notes_pairs:
|
||||
save_epc_property_data(
|
||||
session, epc_data, uploaded_file_id=cast(int, uploaded_file.id)
|
||||
upload_records = [
|
||||
_FileUploadRecord(
|
||||
file_path=fp,
|
||||
file_type=cast(Optional[str], uf.file_type),
|
||||
uploaded_file_id=cast(int, uf.id),
|
||||
)
|
||||
for fp, uf in zip(file_paths, uploaded_files)
|
||||
]
|
||||
|
||||
session.commit()
|
||||
return upload_records
|
||||
|
||||
def _save_site_notes(self, upload_records: List[_FileUploadRecord]) -> None:
|
||||
for record in upload_records:
|
||||
if (
|
||||
record.file_type is None
|
||||
or FileTypeEnum(record.file_type) != FileTypeEnum.RD_SAP_SITE_NOTE
|
||||
):
|
||||
continue
|
||||
try:
|
||||
epc_data = parse_site_notes_pdf(record.file_path)
|
||||
with db_session() as session:
|
||||
save_epc_property_data(
|
||||
session, epc_data, uploaded_file_id=record.uploaded_file_id
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
f"Failed to parse site notes {record.file_path}", exc_info=True
|
||||
)
|
||||
|
||||
def _upload_to_sharepoint(
|
||||
self,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue