diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 70f9eabe..0035a579 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -76,6 +76,8 @@ on: required: false TF_VAR_social_housing_wave_3_sharepoint_id: required: false + TF_VAR_eco_sharepoint_id: + required: false TF_VAR_pashub_email: required: false TF_VAR_pashub_password: @@ -159,6 +161,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} + TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }} @@ -210,6 +213,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} + TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }} diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index fc999bc0..920c9ab0 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -448,6 +448,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.OSMOSIS_ACD_SHAREPOINT_ID }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.PRIVATE_PAY_SHAREPOINT_ID }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }} + TF_VAR_eco_sharepoint_id: ${{ secrets.ECO_SHAREPOINT_ID }} TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }} TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }} TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }} diff --git a/backend/app/config.py b/backend/app/config.py index f969518d..1dc3daaf 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -82,6 +82,7 @@ class Settings(BaseSettings): OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None + ECO_SHAREPOINT_ID: Optional[str] = None OPENAI_API_KEY: Optional[str] = None # Pas Hub diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index b6a73d5d..e00acbe1 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -21,6 +21,8 @@ class FileTypeEnum(enum.Enum): IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation" MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan" RETROFIT_DESIGN_DOC = "retrofit_design_doc" + MCS_COMPLIANCE_CERTIFICATE = "mcs_compliance_certificate" + OTHER = "other" class FileSourceEnum(enum.Enum): diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index e63511eb..c387e0b8 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -17,6 +17,7 @@ class CoreFiles(Enum): IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation" MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan" RETROFIT_DESIGN_DOC = "Retrofit Design Doc" + MCS_COMPLIANCE_CERTIFICATE = "MCS Compliance Certificate" _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { @@ -32,14 +33,21 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value, + CoreFiles.MCS_COMPLIANCE_CERTIFICATE: FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value, } def get_core_file_type( filename: str, evidence_category: Optional[str] = None ) -> Optional[CoreFiles]: - # Identify retrofit design doc using evidence category as the name is possibly unreliable. + # Identify MCS certificate and design doc using evidence category as the names are possibly unreliable. # We might change to always use evidence category, but needs more investigation + if ( + evidence_category is not None + and evidence_category.lower() == "mcs compliance certificate" + ): + return CoreFiles.MCS_COMPLIANCE_CERTIFICATE + if evidence_category is not None and evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC @@ -56,6 +64,7 @@ def get_core_file_type( CoreFiles.RETROFIT_DESIGN_DOC, CoreFiles.IMPROVEMENT_OPTION_EVALUATION, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN, + CoreFiles.MCS_COMPLIANCE_CERTIFICATE, } for core_file in CoreFiles: @@ -68,8 +77,10 @@ def get_core_file_type( return None -def get_file_type_string(filename: str) -> Optional[str]: - core_file: Optional[CoreFiles] = get_core_file_type(filename) +def get_file_type_string( + filename: str, evidence_category: Optional[str] = None +) -> Optional[str]: + core_file: Optional[CoreFiles] = get_core_file_type(filename, evidence_category) if core_file is None: return None diff --git a/backend/pashub_fetcher/evidence_categories.py b/backend/pashub_fetcher/evidence_categories.py new file mode 100644 index 00000000..4e412364 --- /dev/null +++ b/backend/pashub_fetcher/evidence_categories.py @@ -0,0 +1,63 @@ +EVIDENCE_CATEGORIES = [ + "Advice report", + "Air Tests - BGV", + "Air Tightness Strategy", + "Assessment report", + "Blue Site Notes (PAS Assessment)", + "Building Assessment report", + "Building Condition report", + "Building Regulations Sign-off", + "Claim of compliance PAS2030", + "Claim of compliance PAS2035", + "Commissioning checklist", + "Condition report", + "Contract / Invoice", + "Electrical Certificate", + "Energy report", + "Evidence of submission to CPS", + "Floor Plan", + "Full Property Assessment", + "Gas Appliance Benchmarking Certificate", + "Gas Appliance Commissioning Checklist", + "Gas Inspection Certificate", + "Handover and Commissioning Documents", + "Handover Documents", + "Handover documents for client", + "Heat Demand Calculations", + "Heritage Impact Assessment", + "Improvement option evaluation", + "Installation Guides", + "Insurance guarantee", + "Intended outcomes", + "MCS Compliance Certificate", + "Medium term improvement plan", + "Medium term low carbon plan", + "Mid Photo", + "Mid-Install Inspection", + "Minor Works Electrical Certificate", + "Monitoring and evaluation outcomes", + "Occupancy assessment", + "Other", + "Other commissioning certificates", + "Photo", + "Post Energy Performance Report (EPR)", + "Post installation RdSAP", + "Post Photo", + "Pre Energy Performance Report (EPR)", + "Pre installation RdSAP", + "Pre Photo", + "Pre-Design Building Survey", + "Pre-Installation Building Inspection", + "Product Data sheets", + "Product warranty", + "Property Assessment", + "Qualifications", + "Retrofit design", + "Risk assessment", + "Significance survey", + "Site Note (Green /Blue) and Certificate(s)", + "Ventilation Assessment", + "Ventilation Assessment Checklist", + "Ventilation Report", + "Welsh - Checklist", +] diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 626ce59d..00f0ddea 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -40,10 +40,6 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]: if (not pashub_email) or (not pashub_password): raise ValueError("Pas Hub credentials not provided") - sharepoint_client = DomnaSharepointClient( - sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3 - ) - if coordination_hub_email and coordination_hub_password: _coord_email, _coord_password = ( coordination_hub_email, @@ -57,6 +53,16 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]: payload = PashubToAraTriggerRequest.model_validate(body) logger.debug("Successfully validated request body") + sharepoint_client: Optional[DomnaSharepointClient] = None + if payload.sharepoint_site is not None: + try: + resolved_site = DomnaSites[payload.sharepoint_site] + sharepoint_client = DomnaSharepointClient(sharepoint_location=resolved_site) + except KeyError: + logger.warning( + f"Unrecognised sharepoint_site '{payload.sharepoint_site}'; skipping SharePoint upload" + ) + service = PashubService( pashub_client=get_pashub_client(pashub_email, pashub_password), sharepoint_client=sharepoint_client, diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 79d81838..e10fbec7 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -1,6 +1,6 @@ from collections import defaultdict import os -from typing import Dict, List, Optional +from typing import Dict, List, NamedTuple, Optional from datetime import datetime import requests @@ -13,6 +13,22 @@ from utils.logger import setup_logger logger = setup_logger() +class DownloadedFile(NamedTuple): + file_path: str + evidence_category: Optional[str] + created_utc: datetime + + +class _EvidenceFileGroups(NamedTuple): + core: Dict[CoreFiles, EvidenceFileData] + other: List[EvidenceFileData] + + +class DownloadedFiles(NamedTuple): + core: List[DownloadedFile] + other: List[DownloadedFile] + + class UnauthorizedError(Exception): pass @@ -33,42 +49,60 @@ class PashubClient: ) logger.info("Finished initialising CotalityClient") - def get_core_evidence_files_by_job_id(self, job_id: str) -> List[str]: - logger.info(f"Getting Core Evidence Files for job ID {job_id}") + def get_evidence_files_by_job_id( + self, job_id: str, include_other: bool = False + ) -> DownloadedFiles: + logger.info(f"Getting evidence files for job ID {job_id}") evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id) - logger.info(f"Found {len(evidence_list)} Evidence files to get") + logger.info(f"Found {len(evidence_list)} evidence files") if not evidence_list: - return [] + return DownloadedFiles(core=[], other=[]) - saved_files: List[str] = [] - - core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files( + grouped: _EvidenceFileGroups = self._group_into_core_and_other_files( evidence_list ) - logger.info(f"Number of core files to download is {len(core_files)}") - - for _, evidence in core_files.items(): - evidence_id = evidence.file_id - if not evidence_id: + core_files: List[DownloadedFile] = [] + for _, evidence in grouped.core.items(): + if not evidence.file_id: continue - - logger.info(f"Getting metadata for file {evidence.file_name}") metadata: EvidenceMetadata = self._get_evidence_metadata( - job_id, evidence_id + job_id, evidence.file_id ) - download_url: str = self._build_download_url(metadata, evidence.file_id) - output_dir: str = "/tmp" - - file_name: str = evidence.file_name - file_path: str = os.path.join(output_dir, file_name) - + file_path: str = os.path.join("/tmp", evidence.file_name) self._download_file(download_url, file_path) logger.info("Successfully downloaded file") - saved_files.append(file_path) + core_files.append( + DownloadedFile( + file_path=file_path, + evidence_category=evidence.evidence_category, + created_utc=datetime.fromisoformat(evidence.created_utc), + ) + ) - return saved_files + other_files: List[DownloadedFile] = [] + + if include_other: + for evidence in grouped.other: + if not evidence.file_id: + continue + + metadata = self._get_evidence_metadata(job_id, evidence.file_id) + download_url = self._build_download_url(metadata, evidence.file_id) + file_path = os.path.join("/tmp", evidence.file_name) + + self._download_file(download_url, file_path) + logger.info("Successfully downloaded other file") + other_files.append( + DownloadedFile( + file_path=file_path, + evidence_category=evidence.evidence_category, + created_utc=datetime.fromisoformat(evidence.created_utc), + ) + ) + + return DownloadedFiles(core=core_files, other=other_files) def get_uprn_by_job_id(self, job_id: str) -> Optional[str]: logger.info(f"Getting UPRN for job ID {job_id}") @@ -92,30 +126,32 @@ class PashubClient: ) return None - def _select_latest_core_files( + def _group_into_core_and_other_files( self, files: List[EvidenceFileData], - ) -> Dict[CoreFiles, EvidenceFileData]: + ) -> _EvidenceFileGroups: grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list) + other: List[EvidenceFileData] = [] for file in files: core_type: Optional[CoreFiles] = get_core_file_type( file.file_name, file.evidence_category ) if not core_type: + other.append(file) continue grouped[core_type].append(file) - latest_files: Dict[CoreFiles, EvidenceFileData] = {} + latest_core_files: Dict[CoreFiles, EvidenceFileData] = {} for core_type, group in grouped.items(): if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1: osm_candidates = [f for f in group if "-OSM-" in f.file_name] group = osm_candidates if osm_candidates else group latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc)) - latest_files[core_type] = latest + latest_core_files[core_type] = latest - return latest_files + return _EvidenceFileGroups(core=latest_core_files, other=other) def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]: url = f"{self.base}/jobs/{job_id}/evidence" diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index f7f6ccd9..86a553f0 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -11,11 +11,15 @@ from backend.app.db.models.uploaded_file import ( from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf from backend.pashub_fetcher.core_files import get_file_type_string -from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError +from backend.pashub_fetcher.pashub_client import ( + DownloadedFile, + DownloadedFiles, + PashubClient, + UnauthorizedError, +) from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, ) -from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders from datatypes.epc.domain.epc_property_data import EpcPropertyData from utils.logger import setup_logger from utils.s3 import upload_file_to_s3 @@ -34,7 +38,7 @@ class PashubService: def __init__( self, pashub_client: PashubClient, - sharepoint_client: DomnaSharepointClient, + sharepoint_client: Optional[DomnaSharepointClient], s3_bucket: str, coordination_client_factory: Optional[Callable[[], PashubClient]] = None, ) -> None: @@ -75,14 +79,16 @@ class PashubService: logger.info(f"No UPRN found for job {job_id}") try: - job_files: List[str] = active_client.get_core_evidence_files_by_job_id( - job_id + downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id( + job_id, include_other=request.get_other_files ) except UnauthorizedError: if active_client is not self._pashub_client: raise active_client = self._get_coordination_client() - job_files = active_client.get_core_evidence_files_by_job_id(job_id) + downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id( + job_id, include_other=request.get_other_files + ) if uprn or hubspot_deal_id: logger.info("Uploading files to s3") @@ -92,29 +98,47 @@ class PashubService: else FileSourceEnum.COORDINATION_HUB ) upload_records = self._upload_to_s3_and_update_db( - job_files, uprn, hubspot_deal_id, file_source + downloaded.core, uprn, hubspot_deal_id, file_source ) self._save_site_notes(upload_records) - # SharePoint upload disabled: pashub sharepoint_link is inconsistent - # (points to property or project unpredictably) - # if request.sharepoint_link: - # self._upload_to_sharepoint(request.sharepoint_link, job_files) + if downloaded.other: + self._upload_to_s3_and_update_db( + downloaded.other, + uprn, + hubspot_deal_id, + file_source, + default_file_type=FileTypeEnum.OTHER.value, + ) - for file_path in job_files: + if self._sharepoint_client and request.sharepoint_link and request.address: + folder_name = request.address.split("|")[0].strip() + folders = self._sharepoint_client.get_folders_in_path(request.sharepoint_link) + match = next( + (f["name"] for f in folders.get("value", []) if f["name"].lower() == folder_name.lower()), + None, + ) + if match is None: + logger.warning(f"SharePoint folder not found for '{folder_name}' in {request.sharepoint_link}") + else: + property_folder_path = f"{request.sharepoint_link}/{match}" + self._upload_to_sharepoint(property_folder_path, downloaded.core + downloaded.other) + + for df in downloaded.core + downloaded.other: try: - os.remove(file_path) + os.remove(df.file_path) except OSError: - logger.warning(f"Failed to delete temp file {file_path}") + logger.warning(f"Failed to delete temp file {df.file_path}") - return job_files + return [df.file_path for df in downloaded.core + downloaded.other] def _upload_to_s3_and_update_db( self, - job_files: List[str], + job_files: List[DownloadedFile], uprn: Optional[str], hubspot_deal_id: Optional[str], file_source: FileSourceEnum, + default_file_type: Optional[str] = None, ) -> List[_FileUploadRecord]: if not uprn and not hubspot_deal_id: return [] @@ -128,11 +152,11 @@ class PashubService: file_paths: List[str] = [] uploaded_files: List[UploadedFile] = [] - for file_path in job_files: - filename = os.path.basename(file_path) + for df in job_files: + filename = os.path.basename(df.file_path) file_key = f"{base_path}/{filename}" - upload_file_to_s3(file_path, self._s3_bucket, file_key) + upload_file_to_s3(df.file_path, self._s3_bucket, file_key) uploaded_file = UploadedFile( s3_file_bucket=self._s3_bucket, @@ -141,9 +165,9 @@ class PashubService: uprn=int(uprn) if uprn else None, hubspot_deal_id=hubspot_deal_id, file_source=file_source.value, - file_type=get_file_type_string(filename), + file_type=get_file_type_string(filename, df.evidence_category) or default_file_type, ) - file_paths.append(file_path) + file_paths.append(df.file_path) uploaded_files.append(uploaded_file) with db_session() as session: @@ -180,11 +204,13 @@ class PashubService: def _upload_to_sharepoint( self, - sharepoint_link: str, - job_files: List[str], + property_folder_path: str, + files: List[DownloadedFile], ) -> None: - assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}" - - for file_path in job_files: - filename = file_path.split("/")[-1] - self._sharepoint_client.upload_file(file_path, assessment_path, filename) + assert self._sharepoint_client is not None + for df in files: + filename = os.path.basename(df.file_path) + try: + self._sharepoint_client.upload_file(df.file_path, property_folder_path, filename) + except Exception: + logger.warning(f"Failed to upload {filename} to SharePoint", exc_info=True) diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py index 715a09f8..d747a388 100644 --- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py +++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py @@ -14,6 +14,10 @@ class PashubToAraTriggerRequest(BaseModel): hubspot_listing_id: Optional[int] = None hubspot_deal_id: Optional[str] = None + sharepoint_site: Optional[str] = None + + get_other_files: bool = False + @property def pashub_job_id(self) -> str: match = re.search(r"/jobs/([^/]+)", self.pashub_link) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 3c1d11b8..a2047ece 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -183,3 +183,44 @@ def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present # Assert assert result is None + + +def test_core_file_for_mcs_compliance_certificate_returns_mcs_compliance_certificate() -> None: + # Arrange + filename = "MCS_cert_job123.pdf" + + # Act + result = get_core_file_type( + filename, evidence_category="mcs compliance certificate" + ) + + # Assert + assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE + + +def test_core_file_for_mcs_compliance_certificate_is_case_insensitive() -> None: + # Arrange + filename = "some_cert.pdf" + + # Act + result = get_core_file_type( + filename, evidence_category="MCS Compliance Certificate" + ) + + # Assert + assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE + + +def test_get_file_type_string_with_mcs_evidence_category_returns_mcs_compliance_certificate() -> ( + None +): + # Arrange + filename = "some_cert.pdf" + + # Act + result = get_file_type_string( + filename, evidence_category="MCS Compliance Certificate" + ) + + # Assert + assert result == "mcs_compliance_certificate" diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 34260c73..214a14a6 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -1,9 +1,22 @@ # pyright: reportPrivateUsage=false from typing import Optional +from unittest.mock import patch from backend.pashub_fetcher.core_files import CoreFiles from backend.pashub_fetcher.evidence_file_data import EvidenceFileData -from backend.pashub_fetcher.pashub_client import PashubClient +from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata +from backend.pashub_fetcher.pashub_client import ( + DownloadedFile, + DownloadedFiles, + PashubClient, +) + + +def make_metadata() -> EvidenceMetadata: + return EvidenceMetadata( + container_name="my-container", + blob_uri="https://storage.example.com/blob?sas=token", + ) def make_client() -> PashubClient: @@ -26,11 +39,27 @@ def make_file( # --------------------------------------------------------------------------- -# _select_latest_core_files +# _group_into_core_and_other_files # --------------------------------------------------------------------------- -def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: +def test_group_into_core_and_other_files_classifies_core_and_other_correctly() -> None: + # Arrange + client = make_client() + files = [ + make_file(file_name="SiteNote_001.pdf"), + make_file(file_name="some_unknown_document.pdf"), + ] + + # Act + result = client._group_into_core_and_other_files(files) + + # Assert + assert CoreFiles.SITENOTE in result.core + assert [f.file_name for f in result.other] == ["some_unknown_document.pdf"] + + +def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> None: # Arrange client = make_client() files = [ @@ -42,13 +71,16 @@ def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name + == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + ) -def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: +def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None: # Arrange - the non-OSM file is newer but should lose to the OSM file client = make_client() files = [ @@ -65,13 +97,18 @@ def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name + == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + ) -def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None: +def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> ( + None +): # Arrange client = make_client() files = [ @@ -88,13 +125,62 @@ def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() - ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name + == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" + ) -def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None: +def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="MCS_cert_job123.pdf", + evidence_category="MCS Compliance Certificate", + ), + ] + + # Act + result = client._group_into_core_and_other_files(files) + + # Assert + assert CoreFiles.MCS_COMPLIANCE_CERTIFICATE in result.core + assert result.other == [] + + +def test_group_into_core_and_other_files_picks_most_recent_mcs_cert() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="mcs_cert_old.pdf", + evidence_category="MCS Compliance Certificate", + created_utc="2024-01-01T00:00:00", + ), + make_file( + file_name="mcs_cert_new.pdf", + evidence_category="MCS Compliance Certificate", + created_utc="2024-06-01T00:00:00", + ), + ] + + # Act + result = client._group_into_core_and_other_files(files) + + # Assert + assert ( + result.core[CoreFiles.MCS_COMPLIANCE_CERTIFICATE].file_name + == "mcs_cert_new.pdf" + ) + + +def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> ( + None +): # Arrange client = make_client() files = [ @@ -111,7 +197,84 @@ def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + ) + + +# --------------------------------------------------------------------------- +# get_evidence_files_by_job_id +# --------------------------------------------------------------------------- + + +def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_when_include_other_false() -> ( + None +): + # Arrange + client = make_client() + files = [ + make_file(file_name="SiteNote_001.pdf"), + make_file(file_name="unknown_doc.pdf"), + ] + + # Act + with ( + patch.object(client, "_get_evidence_list", return_value=files), + patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), + patch.object(client, "_download_file"), + ): + result = client.get_evidence_files_by_job_id("job-1", include_other=False) + + # Assert + assert isinstance(result, DownloadedFiles) + assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"] + assert result.other == [] + + +def test_get_evidence_files_by_job_id_core_files_carry_evidence_category() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="MCS_cert.pdf", + evidence_category="MCS Compliance Certificate", + ), + ] + + # Act + with ( + patch.object(client, "_get_evidence_list", return_value=files), + patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), + patch.object(client, "_download_file"), + ): + result = client.get_evidence_files_by_job_id("job-1", include_other=False) + + # Assert + assert len(result.core) == 1 + assert result.core[0].evidence_category == "MCS Compliance Certificate" + + +def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> ( + None +): + # Arrange + client = make_client() + files = [ + make_file(file_name="SiteNote_001.pdf"), + make_file(file_name="unknown_doc.pdf"), + ] + + # Act + with ( + patch.object(client, "_get_evidence_list", return_value=files), + patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), + patch.object(client, "_download_file"), + ): + result = client.get_evidence_files_by_job_id("job-1", include_other=True) + + # Assert + assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"] + assert [df.file_path for df in result.other] == ["/tmp/unknown_doc.pdf"] diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 1f750117..ccb80ac4 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -1,17 +1,22 @@ import pytest +from datetime import datetime from typing import Any, Callable, Optional from unittest.mock import MagicMock, call, patch -from backend.app.db.models.uploaded_file import FileSourceEnum -from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError +from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum +from backend.pashub_fetcher.pashub_client import ( + DownloadedFile, + DownloadedFiles, + PashubClient, + UnauthorizedError, +) from backend.pashub_fetcher.pashub_service import PashubService from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, ) from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient - FAKE_JOB_LINK = "https://pashub.net/jobs/job-id-123/details" @@ -20,12 +25,16 @@ def make_request( uprn: Optional[str] = None, hubspot_deal_id: Optional[str] = None, sharepoint_link: Optional[str] = None, + get_other_files: bool = False, + address: Optional[str] = None, ) -> PashubToAraTriggerRequest: return PashubToAraTriggerRequest( pashub_link=pashub_link, uprn=uprn, hubspot_deal_id=hubspot_deal_id, sharepoint_link=sharepoint_link, + get_other_files=get_other_files, + address=address, ) @@ -43,6 +52,16 @@ def make_service( ) +_DEFAULT_UTC = datetime(2024, 1, 1) + + +def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles: + return DownloadedFiles( + core=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in core], + other=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in other], + ) + + # --------------------------------------------------------------------------- # run(): returns file paths # --------------------------------------------------------------------------- @@ -51,10 +70,9 @@ def make_service( def test_run_returns_file_paths() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/a.pdf", - "/tmp/b.pdf", - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf", "/tmp/b.pdf"] + ) service = make_service(pashub_client=mock_client) @@ -64,6 +82,30 @@ def test_run_returns_file_paths() -> None: assert result == ["/tmp/a.pdf", "/tmp/b.pdf"] +# --------------------------------------------------------------------------- +# run(): returns core + other file paths when get_other_files=True +# --------------------------------------------------------------------------- + + +def test_run_returns_core_and_other_file_paths() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"], + other=["/tmp/other.pdf"], + ) + + service = make_service(pashub_client=mock_client) + + # Act + with patch("backend.pashub_fetcher.pashub_service.os.remove"): + result = service.run(make_request(get_other_files=True)) + + # Assert + assert result == ["/tmp/core.pdf", "/tmp/other.pdf"] + + # --------------------------------------------------------------------------- # run(): skips upload when neither uprn nor hubspot_deal_id # --------------------------------------------------------------------------- @@ -72,7 +114,9 @@ def test_run_returns_file_paths() -> None: def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) service = make_service(pashub_client=mock_client) @@ -93,10 +137,9 @@ def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None: def test_run_uploads_files_to_s3_using_uprn_path() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf", - "/tmp/Photopack_002.pdf", - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf", "/tmp/Photopack_002.pdf"] + ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") @@ -132,9 +175,9 @@ def test_run_uploads_files_to_s3_using_uprn_path() -> None: def test_run_persists_uploaded_file_records_to_db() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"] + ) fake_session = MagicMock() service = make_service(pashub_client=mock_client) @@ -163,9 +206,9 @@ def test_run_persists_uploaded_file_records_to_db() -> None: def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"] + ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") @@ -191,9 +234,9 @@ def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None: def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/RdSAP_SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/RdSAP_SiteNote_001.pdf"] + ) fake_epc_data = MagicMock() fake_session = MagicMock() @@ -241,11 +284,15 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -256,20 +303,24 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: assert result == ["/tmp/a.pdf"] coord_client.get_uprn_by_job_id.assert_called_once() - coord_client.get_core_evidence_files_by_job_id.assert_called_once() + coord_client.get_evidence_files_by_job_id.assert_called_once() assert factory.call_count == 1 def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: pas_client = MagicMock(spec=PashubClient) - pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError() + pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -279,7 +330,7 @@ def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: result = service.run(make_request(uprn="12345")) assert result == ["/tmp/a.pdf"] - coord_client.get_core_evidence_files_by_job_id.assert_called_once() + coord_client.get_evidence_files_by_job_id.assert_called_once() pas_client.get_uprn_by_job_id.assert_not_called() @@ -302,24 +353,32 @@ def test_run_raises_unauthorized_when_both_clients_401() -> None: factory = MagicMock(return_value=coord_client) - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with pytest.raises(UnauthorizedError): service.run(make_request()) -def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> None: +def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> ( + None +): pas_client = MagicMock(spec=PashubClient) pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -334,17 +393,23 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value -def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None: +def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> ( + None +): pas_client = MagicMock(spec=PashubClient) - pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError() + pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -359,12 +424,231 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing( assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value +# --------------------------------------------------------------------------- +# run(): get_other_files=True → other temp files deleted after run +# --------------------------------------------------------------------------- + + +def test_run_deletes_other_temp_files_when_get_other_files_true() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"], + other=["/tmp/other.pdf"], + ) + + service = make_service(pashub_client=mock_client) + + # Act + with patch("backend.pashub_fetcher.pashub_service.os.remove") as mock_remove: + service.run(make_request(get_other_files=True)) + + # Assert + mock_remove.assert_any_call("/tmp/core.pdf") + mock_remove.assert_any_call("/tmp/other.pdf") + + +# --------------------------------------------------------------------------- +# run(): get_other_files=True → other files uploaded to S3 +# --------------------------------------------------------------------------- + + +def test_run_uploads_other_files_to_s3_when_get_other_files_true() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"], + other=["/tmp/unknown_file.pdf"], + ) + + service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3, + patch("backend.pashub_fetcher.pashub_service.db_session"), + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + service.run(make_request(uprn="12345", get_other_files=True)) + + # Assert + mock_s3.assert_any_call( + "/tmp/unknown_file.pdf", + "my-bucket", + "documents/uprn/12345/unknown_file.pdf", + ) + + +# --------------------------------------------------------------------------- +# run(): get_other_files=True → other files persisted with file_type OTHER +# --------------------------------------------------------------------------- + + +def test_run_persists_other_files_with_other_file_type() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=[], + other=["/tmp/unknown_file.pdf"], + ) + + fake_session = MagicMock() + service = make_service(pashub_client=mock_client) + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), + patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + mock_db.return_value.__enter__.return_value = fake_session + service.run(make_request(uprn="12345", get_other_files=True)) + + # Assert + all_added = [item for c in fake_session.add_all.call_args_list for item in c[0][0]] + assert len(all_added) == 1 + assert all_added[0].file_type == FileTypeEnum.OTHER.value + + +def test_run_persists_mcs_cert_with_mcs_compliance_certificate_file_type() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = DownloadedFiles( + core=[ + DownloadedFile( + "/tmp/MCS_cert.pdf", "MCS Compliance Certificate", datetime(2024, 1, 1) + ) + ], + other=[], + ) + + fake_session = MagicMock() + service = make_service(pashub_client=mock_client) + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), + patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + mock_db.return_value.__enter__.return_value = fake_session + service.run(make_request(uprn="12345")) + + # Assert + fake_session.add_all.assert_called_once() + added: list[Any] = fake_session.add_all.call_args[0][0] + assert added[0].file_type == FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value + + +# --------------------------------------------------------------------------- +# run(): SharePoint upload +# --------------------------------------------------------------------------- + + +def test_sharepoint_uploads_all_files_to_property_folder() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"], + other=["/tmp/other.pdf"], + ) + + mock_sharepoint = MagicMock(spec=DomnaSharepointClient) + mock_sharepoint.get_folders_in_path.return_value = { + "value": [{"name": "123 Main St"}] + } + + service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint) + + # Act + with patch("backend.pashub_fetcher.pashub_service.os.remove"): + service.run( + make_request( + sharepoint_link="Retrofit/Properties", + get_other_files=True, + address="123 Main St | some deal", + ) + ) + + # Assert + mock_sharepoint.upload_file.assert_any_call( + "/tmp/core.pdf", "Retrofit/Properties/123 Main St", "core.pdf" + ) + mock_sharepoint.upload_file.assert_any_call( + "/tmp/other.pdf", "Retrofit/Properties/123 Main St", "other.pdf" + ) + + +def test_sharepoint_skips_upload_when_folder_not_found() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"] + ) + + mock_sharepoint = MagicMock(spec=DomnaSharepointClient) + mock_sharepoint.get_folders_in_path.return_value = { + "value": [{"name": "Different Property"}] + } + + service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint) + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.os.remove"), + patch("backend.pashub_fetcher.pashub_service.logger") as mock_logger, + ): + service.run( + make_request( + sharepoint_link="Retrofit/Properties", + address="No Such Property | deal", + ) + ) + + # Assert + mock_sharepoint.upload_file.assert_not_called() + mock_logger.warning.assert_called() + + +def test_sharepoint_skips_upload_when_sharepoint_client_is_none() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"] + ) + + service = PashubService( + pashub_client=mock_client, + sharepoint_client=None, + s3_bucket="test-bucket", + ) + + # Act — should not raise AttributeError on None._sharepoint_client + with patch("backend.pashub_fetcher.pashub_service.os.remove"): + result = service.run( + make_request( + sharepoint_link="Retrofit/Properties", + address="123 Main St | deal", + ) + ) + + # Assert + assert result == ["/tmp/core.pdf"] + + def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/RdSAP_SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/RdSAP_SiteNote_001.pdf"] + ) service = make_service(pashub_client=mock_client) diff --git a/backend/pashub_fetcher/trigger_lambda_from_file.py b/backend/pashub_fetcher/trigger_local_lambda_from_file.py similarity index 82% rename from backend/pashub_fetcher/trigger_lambda_from_file.py rename to backend/pashub_fetcher/trigger_local_lambda_from_file.py index fb9d1cbf..ad3097da 100644 --- a/backend/pashub_fetcher/trigger_lambda_from_file.py +++ b/backend/pashub_fetcher/trigger_local_lambda_from_file.py @@ -10,19 +10,19 @@ from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( ) from backend.pashub_fetcher.handler.handler import handler - if __name__ == "__main__": BASE_DIR = os.path.dirname(os.path.dirname(__file__)) filepath: str = os.path.join( BASE_DIR, "pashub_fetcher", - "The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx", + "local_run_02-06-2026", + "ECO_Approach_Coordination_Design_KN.xlsx", ) wb = load_workbook(filepath, data_only=True) - ws = wb["filtered_2"] + ws = wb["filtered"] - HEADER_ROW = 3 + HEADER_ROW = 1 headers: Dict[str, int] = {} for col in range(1, ws.max_column + 1): @@ -31,7 +31,7 @@ if __name__ == "__main__": headers[value.strip()] = col name_col = headers["Name"] - link_col = headers["PasHub Link"] + link_col = headers["PasHub ID"] hubspot_deal_id_col = headers["HubSpot ID"] trigger_requests: List[PashubToAraTriggerRequest] = [] @@ -50,7 +50,10 @@ if __name__ == "__main__": trigger_requests.append( PashubToAraTriggerRequest( - pashub_link=str(link), hubspot_deal_id=str(hubspot_deal_id) + pashub_link=str(link), + hubspot_deal_id=str(hubspot_deal_id), + address=str(name), + get_other_files=True, ) ) diff --git a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py index f4c03afc..d6736eda 100644 --- a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py +++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py @@ -16,44 +16,52 @@ logger: logging.Logger = logging.getLogger(__name__) DRY_RUN: bool = False -DEAL_ID_FILTER: frozenset[str] = frozenset( - { - "379452094688", - "379466504437", - "379660170452", - "380016925932", - "379848065216", - "379466504434", - "379452094690", - "379965924567", - "380016925923", - "379792072898", - "379654754502", - "379560262861", - "379969670369", - "379248717001", - "379971468493", - "379999888607", - "379606372580", - "379969603797", - "379967743213", - "379263155434", - "379855267025", - "379889899719", - "379071064307", - "379867925741", - } -) +# DEAL_ID_FILTER: frozenset[str] = frozenset( +# { +# "379452094688", +# "379466504437", +# "379660170452", +# "380016925932", +# "379848065216", +# "379466504434", +# "379452094690", +# "379965924567", +# "380016925923", +# "379792072898", +# "379654754502", +# "379560262861", +# "379969670369", +# "379248717001", +# "379971468493", +# "379999888607", +# "379606372580", +# "379969603797", +# "379967743213", +# "379263155434", +# "379855267025", +# "379889899719", +# "379071064307", +# "379867925741", +# } +# ) + +DEAL_ID_FILTER = None EXCEL_PATH: str = os.path.join( os.path.dirname(__file__), - "united-infrastructure-exports-all-deals-2026-05-14.xlsx", + "local_run_02-06-2026/ECO_Approach_Coordination_Design_KN.xlsx", ) +SHAREPOINT_PROPERTIES_FOLDER: str = ( + "Housing Associations/- Client Shared Folders/Abri/Abri Property Folders (Full PAS Info)" +) + +SHAREPOINT_SITE: str = "ECO" + def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: wb = load_workbook(excel_path, data_only=True) - ws = wb.worksheets[0] + ws = wb.worksheets[1] headers: dict[str, int] = {} for col in range(1, ws.max_column + 1): @@ -61,10 +69,10 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: if header_val is not None: headers[str(header_val).strip()] = col - pashub_col: int = headers["PasHub link"] - record_id_col: int = headers["Record ID"] - deal_name_col: int = headers["Deal Name"] - deal_stage_col: int = headers["Deal Stage"] + pashub_col: int = headers["PasHub ID"] + record_id_col: int = headers["HubSpot ID"] + deal_name_col: int = headers["Name"] + deal_stage_col: Optional[int] = headers.get("Deal Stage", None) requests: list[PashubToAraTriggerRequest] = [] @@ -77,7 +85,9 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: record_id_raw = ws.cell(row=row, column=record_id_col).value deal_name_raw = ws.cell(row=row, column=deal_name_col).value - deal_stage_raw = ws.cell(row=row, column=deal_stage_col).value + deal_stage_raw = ( + ws.cell(row=row, column=deal_stage_col).value if deal_stage_col else None + ) hubspot_deal_id: Optional[str] = ( str(record_id_raw) if record_id_raw is not None else None @@ -95,6 +105,8 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: hubspot_deal_id=hubspot_deal_id, address=address, deal_stage=deal_stage, + sharepoint_link=SHAREPOINT_PROPERTIES_FOLDER or None, + sharepoint_site=SHAREPOINT_SITE, ) ) @@ -116,7 +128,7 @@ def main() -> None: for request in trigger_requests: action: str = "DRY RUN" if DRY_RUN else "SENDING" logger.info( - f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link}" + f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link} sharepoint_link={request.sharepoint_link}" ) if not DRY_RUN: diff --git a/deployment/terraform/lambda/pashub_to_ara/main.tf b/deployment/terraform/lambda/pashub_to_ara/main.tf index eba9c874..b5714055 100644 --- a/deployment/terraform/lambda/pashub_to_ara/main.tf +++ b/deployment/terraform/lambda/pashub_to_ara/main.tf @@ -47,6 +47,7 @@ module "lambda" { OSMOSIS_ACD_SHAREPOINT_ID = var.osmosis_acd_sharepoint_id PRIVATE_PAY_SHAREPOINT_ID = var.private_pay_sharepoint_id SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id + ECO_SHAREPOINT_ID = var.eco_sharepoint_id PASHUB_EMAIL = var.pashub_email PASHUB_PASSWORD = var.pashub_password PASHUB_COORDINATION_EMAIL = var.pashub_coordination_email diff --git a/deployment/terraform/lambda/pashub_to_ara/variables.tf b/deployment/terraform/lambda/pashub_to_ara/variables.tf index cdeff256..29b7af70 100644 --- a/deployment/terraform/lambda/pashub_to_ara/variables.tf +++ b/deployment/terraform/lambda/pashub_to_ara/variables.tf @@ -92,6 +92,11 @@ variable "social_housing_wave_3_sharepoint_id" { sensitive = true } +variable "eco_sharepoint_id" { + type = string + sensitive = true +} + variable "pashub_email" { type = string sensitive = true diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 176e9b15..589e526b 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -67,7 +67,9 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}" ) - _trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id) + _trigger_magicplan_fetcher( + sqs_client, hubspot_deal, listing, hubspot_deal_id + ) else: # Deal already in db, check whether anything has changed logger.info( @@ -119,13 +121,18 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}" ) - _trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id) + _trigger_magicplan_fetcher( + sqs_client, hubspot_deal, listing, hubspot_deal_id + ) print("done") def _trigger_magicplan_fetcher( - sqs_client: Any, hubspot_deal: Dict[str, str], listing: Optional[dict[str, str]], hubspot_deal_id: str + sqs_client: Any, + hubspot_deal: Dict[str, str], + listing: Optional[dict[str, str]], + hubspot_deal_id: str, ) -> None: message_body = { "address": hubspot_deal.get("dealname"), @@ -136,9 +143,7 @@ def _trigger_magicplan_fetcher( QueueUrl=get_settings().MAGICPLAN_SQS_URL, MessageBody=json.dumps(message_body), ) - logger.info( - f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}" - ) + logger.info(f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}") def _trigger_pashub_fetcher( @@ -148,7 +153,7 @@ def _trigger_pashub_fetcher( "pashub_link": hubspot_deal["pashub_link"], "address": None, # potentially available from Listing, leave as None for now "hubspot_deal_id": deal_id, - "sharepoint_link": hubspot_deal.get("sharepoint_link", None), + # "sharepoint_link": hubspot_deal.get("sharepoint_link", None), # Don't send sharepoint link for now as they are inconsistent "uprn": hubspot_deal.get("national_uprn", None), "landlord_property_id": hubspot_deal.get("owner_property_id", None), "deal_stage": hubspot_deal.get("deal_stage", None), diff --git a/utils/sharepoint/domna_sites.py b/utils/sharepoint/domna_sites.py index e5efb82c..ce579af6 100644 --- a/utils/sharepoint/domna_sites.py +++ b/utils/sharepoint/domna_sites.py @@ -9,3 +9,4 @@ class DomnaSites(Enum): OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID") PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID") SOCIAL_HOUSING_WAVE_3 = os.getenv("SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID") + ECO = os.getenv("ECO_SHAREPOINT_ID")