From fe482a99076aeaf728375cbaf4f6fd4d82140f4e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 14:09:14 +0000 Subject: [PATCH 01/36] rename local handler trigger script --- ...gger_lambda_from_file.py => trigger_local_lambda_from_file.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename backend/pashub_fetcher/{trigger_lambda_from_file.py => trigger_local_lambda_from_file.py} (100%) diff --git a/backend/pashub_fetcher/trigger_lambda_from_file.py b/backend/pashub_fetcher/trigger_local_lambda_from_file.py similarity index 100% rename from backend/pashub_fetcher/trigger_lambda_from_file.py rename to backend/pashub_fetcher/trigger_local_lambda_from_file.py From 62eea9f005ef220543c220e9839466b82a09de2d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 14:10:25 +0000 Subject: [PATCH 02/36] allow for missing deal stage column when triggering sqs from file --- backend/pashub_fetcher/trigger_pashub_sqs_from_file.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py index f4c03afc..c751171a 100644 --- a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py +++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py @@ -64,7 +64,7 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: pashub_col: int = headers["PasHub link"] record_id_col: int = headers["Record ID"] deal_name_col: int = headers["Deal Name"] - deal_stage_col: int = headers["Deal Stage"] + deal_stage_col: Optional[int] = headers["Deal Stage"] requests: list[PashubToAraTriggerRequest] = [] @@ -77,7 +77,9 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: record_id_raw = ws.cell(row=row, column=record_id_col).value deal_name_raw = ws.cell(row=row, column=deal_name_col).value - deal_stage_raw = ws.cell(row=row, column=deal_stage_col).value + deal_stage_raw = ( + ws.cell(row=row, column=deal_stage_col).value if deal_stage_col else None + ) hubspot_deal_id: Optional[str] = ( str(record_id_raw) if record_id_raw is not None else None From 8e0392514fd944227ceeef3ccc26b19f117ba0c0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:03:13 +0000 Subject: [PATCH 03/36] =?UTF-8?q?`=5Fselect=5Fother=5Ffiles`=20returns=20n?= =?UTF-8?q?on-core=20evidence=20files=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 12 ++++++--- backend/pashub_fetcher/pashub_service.py | 10 ++++---- .../pashub_to_ara_trigger_request.py | 2 ++ .../tests/test_pashub_client.py | 25 +++++++++++++++++++ 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 79d81838..30c1ee76 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -92,6 +92,12 @@ class PashubClient: ) return None + def _select_other_files( + self, + files: List[EvidenceFileData], + ) -> List[EvidenceFileData]: + raise NotImplementedError + def _select_latest_core_files( self, files: List[EvidenceFileData], @@ -106,16 +112,16 @@ class PashubClient: continue grouped[core_type].append(file) - latest_files: Dict[CoreFiles, EvidenceFileData] = {} + latest_core_files: Dict[CoreFiles, EvidenceFileData] = {} for core_type, group in grouped.items(): if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1: osm_candidates = [f for f in group if "-OSM-" in f.file_name] group = osm_candidates if osm_candidates else group latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc)) - latest_files[core_type] = latest + latest_core_files[core_type] = latest - return latest_files + return latest_core_files def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]: url = f"{self.base}/jobs/{job_id}/evidence" diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index f7f6ccd9..6138abe9 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -75,14 +75,14 @@ class PashubService: logger.info(f"No UPRN found for job {job_id}") try: - job_files: List[str] = active_client.get_core_evidence_files_by_job_id( + core_files: List[str] = active_client.get_core_evidence_files_by_job_id( job_id ) except UnauthorizedError: if active_client is not self._pashub_client: raise active_client = self._get_coordination_client() - job_files = active_client.get_core_evidence_files_by_job_id(job_id) + core_files = active_client.get_core_evidence_files_by_job_id(job_id) if uprn or hubspot_deal_id: logger.info("Uploading files to s3") @@ -92,7 +92,7 @@ class PashubService: else FileSourceEnum.COORDINATION_HUB ) upload_records = self._upload_to_s3_and_update_db( - job_files, uprn, hubspot_deal_id, file_source + core_files, uprn, hubspot_deal_id, file_source ) self._save_site_notes(upload_records) @@ -101,13 +101,13 @@ class PashubService: # if request.sharepoint_link: # self._upload_to_sharepoint(request.sharepoint_link, job_files) - for file_path in job_files: + for file_path in core_files: try: os.remove(file_path) except OSError: logger.warning(f"Failed to delete temp file {file_path}") - return job_files + return core_files def _upload_to_s3_and_update_db( self, diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py index 715a09f8..7fb00508 100644 --- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py +++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py @@ -14,6 +14,8 @@ class PashubToAraTriggerRequest(BaseModel): hubspot_listing_id: Optional[int] = None hubspot_deal_id: Optional[str] = None + get_other_files: Optional[bool] = False + @property def pashub_job_id(self) -> str: match = re.search(r"/jobs/([^/]+)", self.pashub_link) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 34260c73..a58b245e 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -25,6 +25,31 @@ def make_file( ) +# --------------------------------------------------------------------------- +# _select_other_files +# --------------------------------------------------------------------------- + + +def test_select_other_files_returns_non_core_files_only() -> None: + # Arrange + client = make_client() + files = [ + make_file(file_name="SiteNote_001.pdf"), + make_file(file_name="some_unknown_document.pdf"), + make_file(file_name="another_random_file.docx"), + make_file(file_name="Photopack_002.pdf"), + ] + + # Act + result = client._select_other_files(files) + + # Assert + assert [f.file_name for f in result] == [ + "some_unknown_document.pdf", + "another_random_file.docx", + ] + + # --------------------------------------------------------------------------- # _select_latest_core_files # --------------------------------------------------------------------------- From de9ec989d3c66b1a4061145bc5b8ee803f119a6e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:04:28 +0000 Subject: [PATCH 04/36] =?UTF-8?q?`=5Fselect=5Fother=5Ffiles`=20returns=20n?= =?UTF-8?q?on-core=20evidence=20files=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 30c1ee76..5fc32954 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -96,7 +96,7 @@ class PashubClient: self, files: List[EvidenceFileData], ) -> List[EvidenceFileData]: - raise NotImplementedError + return [f for f in files if get_core_file_type(f.file_name, f.evidence_category) is None] def _select_latest_core_files( self, From a1620f501563da58d5472341071ea36f9f25af7d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:07:19 +0000 Subject: [PATCH 05/36] =?UTF-8?q?Group=20evidence=20into=20core=20and=20ot?= =?UTF-8?q?her=20via=20`=5Fgroup=5Finto=5Fcore=5Fand=5Fother=5Ffiles`=20?= =?UTF-8?q?=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 25 +++++------ .../tests/test_pashub_client.py | 43 ++++++++----------- 2 files changed, 30 insertions(+), 38 deletions(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 5fc32954..26061e3e 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -1,6 +1,6 @@ from collections import defaultdict import os -from typing import Dict, List, Optional +from typing import Dict, List, NamedTuple, Optional from datetime import datetime import requests @@ -13,6 +13,11 @@ from utils.logger import setup_logger logger = setup_logger() +class _EvidenceFileGroups(NamedTuple): + core: Dict[CoreFiles, EvidenceFileData] + other: List[EvidenceFileData] + + class UnauthorizedError(Exception): pass @@ -42,9 +47,9 @@ class PashubClient: saved_files: List[str] = [] - core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files( + core_files: Dict[CoreFiles, EvidenceFileData] = self._group_into_core_and_other_files( evidence_list - ) + ).core logger.info(f"Number of core files to download is {len(core_files)}") @@ -92,23 +97,19 @@ class PashubClient: ) return None - def _select_other_files( + def _group_into_core_and_other_files( self, files: List[EvidenceFileData], - ) -> List[EvidenceFileData]: - return [f for f in files if get_core_file_type(f.file_name, f.evidence_category) is None] - - def _select_latest_core_files( - self, - files: List[EvidenceFileData], - ) -> Dict[CoreFiles, EvidenceFileData]: + ) -> _EvidenceFileGroups: grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list) + other: List[EvidenceFileData] = [] for file in files: core_type: Optional[CoreFiles] = get_core_file_type( file.file_name, file.evidence_category ) if not core_type: + other.append(file) continue grouped[core_type].append(file) @@ -121,7 +122,7 @@ class PashubClient: latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc)) latest_core_files[core_type] = latest - return latest_core_files + return _EvidenceFileGroups(core=latest_core_files, other=other) def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]: url = f"{self.base}/jobs/{job_id}/evidence" diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index a58b245e..aa6943c2 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -26,36 +26,27 @@ def make_file( # --------------------------------------------------------------------------- -# _select_other_files +# _group_into_core_and_other_files # --------------------------------------------------------------------------- -def test_select_other_files_returns_non_core_files_only() -> None: +def test_group_into_core_and_other_files_classifies_core_and_other_correctly() -> None: # Arrange client = make_client() files = [ make_file(file_name="SiteNote_001.pdf"), make_file(file_name="some_unknown_document.pdf"), - make_file(file_name="another_random_file.docx"), - make_file(file_name="Photopack_002.pdf"), ] # Act - result = client._select_other_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert [f.file_name for f in result] == [ - "some_unknown_document.pdf", - "another_random_file.docx", - ] + assert CoreFiles.SITENOTE in result.core + assert [f.file_name for f in result.other] == ["some_unknown_document.pdf"] -# --------------------------------------------------------------------------- -# _select_latest_core_files -# --------------------------------------------------------------------------- - - -def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: +def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> None: # Arrange client = make_client() files = [ @@ -67,13 +58,13 @@ def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" -def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: +def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None: # Arrange - the non-OSM file is newer but should lose to the OSM file client = make_client() files = [ @@ -90,13 +81,13 @@ def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" -def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None: +def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> None: # Arrange client = make_client() files = [ @@ -113,13 +104,13 @@ def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() - ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" + assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" -def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None: +def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> None: # Arrange client = make_client() files = [ @@ -136,7 +127,7 @@ def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" From 15e37ef0e043f98ef14639ef77bd07188930c027 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:09:49 +0000 Subject: [PATCH 06/36] =?UTF-8?q?`get=5Fevidence=5Ffiles=5Fby=5Fjob=5Fid`?= =?UTF-8?q?=20returns=20`DownloadedFiles`=20with=20empty=20`other`=20when?= =?UTF-8?q?=20`include=5Fother=3DFalse`=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 10 +++++ .../tests/test_pashub_client.py | 38 ++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 26061e3e..0234d0af 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -18,6 +18,11 @@ class _EvidenceFileGroups(NamedTuple): other: List[EvidenceFileData] +class DownloadedFiles(NamedTuple): + core: List[str] + other: List[str] + + class UnauthorizedError(Exception): pass @@ -75,6 +80,11 @@ class PashubClient: return saved_files + def get_evidence_files_by_job_id( + self, job_id: str, include_other: bool = False + ) -> DownloadedFiles: + raise NotImplementedError + def get_uprn_by_job_id(self, job_id: str) -> Optional[str]: logger.info(f"Getting UPRN for job ID {job_id}") url = f"{self.base}/jobs/{job_id}" diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index aa6943c2..fa23378d 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -1,9 +1,18 @@ # pyright: reportPrivateUsage=false from typing import Optional +from unittest.mock import patch from backend.pashub_fetcher.core_files import CoreFiles from backend.pashub_fetcher.evidence_file_data import EvidenceFileData -from backend.pashub_fetcher.pashub_client import PashubClient +from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata +from backend.pashub_fetcher.pashub_client import DownloadedFiles, PashubClient + + +def make_metadata() -> EvidenceMetadata: + return EvidenceMetadata( + container_name="my-container", + blob_uri="https://storage.example.com/blob?sas=token", + ) def make_client() -> PashubClient: @@ -131,3 +140,30 @@ def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candid # Assert assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + + +# --------------------------------------------------------------------------- +# get_evidence_files_by_job_id +# --------------------------------------------------------------------------- + + +def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_when_include_other_false() -> None: + # Arrange + client = make_client() + files = [ + make_file(file_name="SiteNote_001.pdf"), + make_file(file_name="unknown_doc.pdf"), + ] + + # Act + with ( + patch.object(client, "_get_evidence_list", return_value=files), + patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), + patch.object(client, "_download_file"), + ): + result = client.get_evidence_files_by_job_id("job-1", include_other=False) + + # Assert + assert isinstance(result, DownloadedFiles) + assert result.core == ["/tmp/SiteNote_001.pdf"] + assert result.other == [] From 7adcad3ee6772e89b09a94f849ee6b47cf099ab4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:11:35 +0000 Subject: [PATCH 07/36] =?UTF-8?q?`get=5Fevidence=5Ffiles=5Fby=5Fjob=5Fid`?= =?UTF-8?q?=20returns=20`DownloadedFiles`=20with=20empty=20`other`=20when?= =?UTF-8?q?=20`include=5Fother=3DFalse`=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 0234d0af..084d4344 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -83,7 +83,28 @@ class PashubClient: def get_evidence_files_by_job_id( self, job_id: str, include_other: bool = False ) -> DownloadedFiles: - raise NotImplementedError + logger.info(f"Getting evidence files for job ID {job_id}") + evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id) + logger.info(f"Found {len(evidence_list)} evidence files") + if not evidence_list: + return DownloadedFiles(core=[], other=[]) + + grouped = self._group_into_core_and_other_files(evidence_list) + + core_paths: List[str] = [] + for _, evidence in grouped.core.items(): + if not evidence.file_id: + continue + metadata: EvidenceMetadata = self._get_evidence_metadata( + job_id, evidence.file_id + ) + download_url: str = self._build_download_url(metadata, evidence.file_id) + file_path: str = os.path.join("/tmp", evidence.file_name) + self._download_file(download_url, file_path) + logger.info("Successfully downloaded file") + core_paths.append(file_path) + + return DownloadedFiles(core=core_paths, other=[]) def get_uprn_by_job_id(self, job_id: str) -> Optional[str]: logger.info(f"Getting UPRN for job ID {job_id}") From 9cf6eaec4b9b6e3482eaca0d8f9772bc401f43b4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:13:20 +0000 Subject: [PATCH 08/36] =?UTF-8?q?`get=5Fevidence=5Ffiles=5Fby=5Fjob=5Fid`?= =?UTF-8?q?=20downloads=20other=20files=20when=20`include=5Fother=3DTrue`?= =?UTF-8?q?=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_client.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index fa23378d..c6e7b780 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -167,3 +167,24 @@ def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_ assert isinstance(result, DownloadedFiles) assert result.core == ["/tmp/SiteNote_001.pdf"] assert result.other == [] + + +def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> None: + # Arrange + client = make_client() + files = [ + make_file(file_name="SiteNote_001.pdf"), + make_file(file_name="unknown_doc.pdf"), + ] + + # Act + with ( + patch.object(client, "_get_evidence_list", return_value=files), + patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), + patch.object(client, "_download_file"), + ): + result = client.get_evidence_files_by_job_id("job-1", include_other=True) + + # Assert + assert result.core == ["/tmp/SiteNote_001.pdf"] + assert result.other == ["/tmp/unknown_doc.pdf"] From ad4b88515da492c4724fdb360cc2f641de34b19b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:14:30 +0000 Subject: [PATCH 09/36] =?UTF-8?q?`get=5Fevidence=5Ffiles=5Fby=5Fjob=5Fid`?= =?UTF-8?q?=20downloads=20other=20files=20when=20`include=5Fother=3DTrue`?= =?UTF-8?q?=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 084d4344..da44cf48 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -104,7 +104,19 @@ class PashubClient: logger.info("Successfully downloaded file") core_paths.append(file_path) - return DownloadedFiles(core=core_paths, other=[]) + other_paths: List[str] = [] + if include_other: + for evidence in grouped.other: + if not evidence.file_id: + continue + metadata = self._get_evidence_metadata(job_id, evidence.file_id) + download_url = self._build_download_url(metadata, evidence.file_id) + file_path = os.path.join("/tmp", evidence.file_name) + self._download_file(download_url, file_path) + logger.info("Successfully downloaded other file") + other_paths.append(file_path) + + return DownloadedFiles(core=core_paths, other=other_paths) def get_uprn_by_job_id(self, job_id: str) -> Optional[str]: logger.info(f"Getting UPRN for job ID {job_id}") From 8b6f67b3572e34752074a44010aeb5c6fdeed747 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:51:53 +0000 Subject: [PATCH 10/36] =?UTF-8?q?Wire=20service=20to=20`get=5Fevidence=5Ff?= =?UTF-8?q?iles=5Fby=5Fjob=5Fid`;=20retire=20`get=5Fcore=5Fevidence=5Ffile?= =?UTF-8?q?s=5Fby=5Fjob=5Fid`=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 37 --------- backend/pashub_fetcher/pashub_service.py | 25 ++++-- .../pashub_to_ara_trigger_request.py | 2 +- .../tests/test_pashub_service.py | 78 ++++++++++++------- 4 files changed, 66 insertions(+), 76 deletions(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index da44cf48..c3f1caff 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -43,43 +43,6 @@ class PashubClient: ) logger.info("Finished initialising CotalityClient") - def get_core_evidence_files_by_job_id(self, job_id: str) -> List[str]: - logger.info(f"Getting Core Evidence Files for job ID {job_id}") - evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id) - logger.info(f"Found {len(evidence_list)} Evidence files to get") - if not evidence_list: - return [] - - saved_files: List[str] = [] - - core_files: Dict[CoreFiles, EvidenceFileData] = self._group_into_core_and_other_files( - evidence_list - ).core - - logger.info(f"Number of core files to download is {len(core_files)}") - - for _, evidence in core_files.items(): - evidence_id = evidence.file_id - if not evidence_id: - continue - - logger.info(f"Getting metadata for file {evidence.file_name}") - metadata: EvidenceMetadata = self._get_evidence_metadata( - job_id, evidence_id - ) - - download_url: str = self._build_download_url(metadata, evidence.file_id) - output_dir: str = "/tmp" - - file_name: str = evidence.file_name - file_path: str = os.path.join(output_dir, file_name) - - self._download_file(download_url, file_path) - logger.info("Successfully downloaded file") - saved_files.append(file_path) - - return saved_files - def get_evidence_files_by_job_id( self, job_id: str, include_other: bool = False ) -> DownloadedFiles: diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index 6138abe9..c9fce806 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -11,7 +11,11 @@ from backend.app.db.models.uploaded_file import ( from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf from backend.pashub_fetcher.core_files import get_file_type_string -from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError +from backend.pashub_fetcher.pashub_client import ( + DownloadedFiles, + PashubClient, + UnauthorizedError, +) from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, ) @@ -75,14 +79,16 @@ class PashubService: logger.info(f"No UPRN found for job {job_id}") try: - core_files: List[str] = active_client.get_core_evidence_files_by_job_id( - job_id + downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id( + job_id, include_other=request.get_other_files ) except UnauthorizedError: if active_client is not self._pashub_client: raise active_client = self._get_coordination_client() - core_files = active_client.get_core_evidence_files_by_job_id(job_id) + downloaded = active_client.get_evidence_files_by_job_id( + job_id, include_other=request.get_other_files + ) if uprn or hubspot_deal_id: logger.info("Uploading files to s3") @@ -92,22 +98,25 @@ class PashubService: else FileSourceEnum.COORDINATION_HUB ) upload_records = self._upload_to_s3_and_update_db( - core_files, uprn, hubspot_deal_id, file_source + downloaded.core, uprn, hubspot_deal_id, file_source ) self._save_site_notes(upload_records) # SharePoint upload disabled: pashub sharepoint_link is inconsistent # (points to property or project unpredictably) # if request.sharepoint_link: - # self._upload_to_sharepoint(request.sharepoint_link, job_files) + # self._upload_to_sharepoint(request.sharepoint_link, downloaded.core) - for file_path in core_files: + if request.get_other_files: + pass # TODO: process downloaded.other + + for file_path in downloaded.core: try: os.remove(file_path) except OSError: logger.warning(f"Failed to delete temp file {file_path}") - return core_files + return downloaded.core def _upload_to_s3_and_update_db( self, diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py index 7fb00508..5f6ce37d 100644 --- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py +++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py @@ -14,7 +14,7 @@ class PashubToAraTriggerRequest(BaseModel): hubspot_listing_id: Optional[int] = None hubspot_deal_id: Optional[str] = None - get_other_files: Optional[bool] = False + get_other_files: bool = False @property def pashub_job_id(self) -> str: diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 1f750117..7ec8dea2 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -4,7 +4,11 @@ from unittest.mock import MagicMock, call, patch from backend.app.db.models.uploaded_file import FileSourceEnum -from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError +from backend.pashub_fetcher.pashub_client import ( + DownloadedFiles, + PashubClient, + UnauthorizedError, +) from backend.pashub_fetcher.pashub_service import PashubService from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, @@ -20,12 +24,14 @@ def make_request( uprn: Optional[str] = None, hubspot_deal_id: Optional[str] = None, sharepoint_link: Optional[str] = None, + get_other_files: bool = False, ) -> PashubToAraTriggerRequest: return PashubToAraTriggerRequest( pashub_link=pashub_link, uprn=uprn, hubspot_deal_id=hubspot_deal_id, sharepoint_link=sharepoint_link, + get_other_files=get_other_files, ) @@ -43,6 +49,10 @@ def make_service( ) +def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles: + return DownloadedFiles(core=core, other=other) + + # --------------------------------------------------------------------------- # run(): returns file paths # --------------------------------------------------------------------------- @@ -51,10 +61,9 @@ def make_service( def test_run_returns_file_paths() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/a.pdf", - "/tmp/b.pdf", - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf", "/tmp/b.pdf"] + ) service = make_service(pashub_client=mock_client) @@ -72,7 +81,9 @@ def test_run_returns_file_paths() -> None: def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) service = make_service(pashub_client=mock_client) @@ -93,10 +104,9 @@ def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None: def test_run_uploads_files_to_s3_using_uprn_path() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf", - "/tmp/Photopack_002.pdf", - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf", "/tmp/Photopack_002.pdf"] + ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") @@ -132,9 +142,9 @@ def test_run_uploads_files_to_s3_using_uprn_path() -> None: def test_run_persists_uploaded_file_records_to_db() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"] + ) fake_session = MagicMock() service = make_service(pashub_client=mock_client) @@ -163,9 +173,9 @@ def test_run_persists_uploaded_file_records_to_db() -> None: def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"] + ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") @@ -191,9 +201,9 @@ def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None: def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/RdSAP_SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/RdSAP_SiteNote_001.pdf"] + ) fake_epc_data = MagicMock() fake_session = MagicMock() @@ -241,7 +251,9 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) @@ -256,16 +268,18 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: assert result == ["/tmp/a.pdf"] coord_client.get_uprn_by_job_id.assert_called_once() - coord_client.get_core_evidence_files_by_job_id.assert_called_once() + coord_client.get_evidence_files_by_job_id.assert_called_once() assert factory.call_count == 1 def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: pas_client = MagicMock(spec=PashubClient) - pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError() + pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) @@ -279,7 +293,7 @@ def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: result = service.run(make_request(uprn="12345")) assert result == ["/tmp/a.pdf"] - coord_client.get_core_evidence_files_by_job_id.assert_called_once() + coord_client.get_evidence_files_by_job_id.assert_called_once() pas_client.get_uprn_by_job_id.assert_not_called() @@ -314,7 +328,9 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() @@ -336,10 +352,12 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None: pas_client = MagicMock(spec=PashubClient) - pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError() + pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() @@ -362,9 +380,9 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing( def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/RdSAP_SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/RdSAP_SiteNote_001.pdf"] + ) service = make_service(pashub_client=mock_client) From d5a33573430e26944294bef64bfba8b257a12848 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:52:44 +0000 Subject: [PATCH 11/36] =?UTF-8?q?Service=20deletes=20other-file=20temp=20p?= =?UTF-8?q?aths=20after=20run=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_service.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 7ec8dea2..eec928f3 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -377,6 +377,31 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing( assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value +# --------------------------------------------------------------------------- +# run(): get_other_files=True → other temp files deleted after run +# --------------------------------------------------------------------------- + + +def test_run_deletes_other_temp_files_when_get_other_files_true() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"], + other=["/tmp/other.pdf"], + ) + + service = make_service(pashub_client=mock_client) + + # Act + with patch("backend.pashub_fetcher.pashub_service.os.remove") as mock_remove: + service.run(make_request(get_other_files=True)) + + # Assert + mock_remove.assert_any_call("/tmp/core.pdf") + mock_remove.assert_any_call("/tmp/other.pdf") + + def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None From 60447a58e3b62d18d7bc31534d90ffc9409d85ad Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:53:28 +0000 Subject: [PATCH 12/36] =?UTF-8?q?Service=20deletes=20other-file=20temp=20p?= =?UTF-8?q?aths=20after=20run=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index c9fce806..a4b34cab 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -110,7 +110,7 @@ class PashubService: if request.get_other_files: pass # TODO: process downloaded.other - for file_path in downloaded.core: + for file_path in downloaded.core + downloaded.other: try: os.remove(file_path) except OSError: From 9b74bc5f3f0006e5d3bd540b56dfe0eaad516473 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 08:09:36 +0000 Subject: [PATCH 13/36] tidying for readability --- backend/pashub_fetcher/pashub_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index c3f1caff..45767874 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -68,13 +68,16 @@ class PashubClient: core_paths.append(file_path) other_paths: List[str] = [] + if include_other: for evidence in grouped.other: if not evidence.file_id: continue + metadata = self._get_evidence_metadata(job_id, evidence.file_id) download_url = self._build_download_url(metadata, evidence.file_id) file_path = os.path.join("/tmp", evidence.file_name) + self._download_file(download_url, file_path) logger.info("Successfully downloaded other file") other_paths.append(file_path) From 1a1f9f2e6a5c2bd7078149af54db327e3a796a27 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 09:35:12 +0000 Subject: [PATCH 14/36] =?UTF-8?q?Upload=20other=20files=20to=20S3=20when?= =?UTF-8?q?=20get=5Fother=5Ffiles=20is=20True=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/db/models/uploaded_file.py | 1 + backend/pashub_fetcher/pashub_service.py | 2 +- .../tests/test_pashub_service.py | 32 +++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index b6a73d5d..b5367305 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -21,6 +21,7 @@ class FileTypeEnum(enum.Enum): IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation" MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan" RETROFIT_DESIGN_DOC = "retrofit_design_doc" + OTHER = "other" class FileSourceEnum(enum.Enum): diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index a4b34cab..a54036a8 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -86,7 +86,7 @@ class PashubService: if active_client is not self._pashub_client: raise active_client = self._get_coordination_client() - downloaded = active_client.get_evidence_files_by_job_id( + downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id( job_id, include_other=request.get_other_files ) diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index eec928f3..d8467209 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -402,6 +402,38 @@ def test_run_deletes_other_temp_files_when_get_other_files_true() -> None: mock_remove.assert_any_call("/tmp/other.pdf") +# --------------------------------------------------------------------------- +# run(): get_other_files=True → other files uploaded to S3 +# --------------------------------------------------------------------------- + + +def test_run_uploads_other_files_to_s3_when_get_other_files_true() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"], + other=["/tmp/unknown_file.pdf"], + ) + + service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3, + patch("backend.pashub_fetcher.pashub_service.db_session"), + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + service.run(make_request(uprn="12345", get_other_files=True)) + + # Assert + mock_s3.assert_any_call( + "/tmp/unknown_file.pdf", + "my-bucket", + "documents/uprn/12345/unknown_file.pdf", + ) + + def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None From ecca84294ad0cac2f9b21c61c70cdfe8d662ba8c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 09:37:17 +0000 Subject: [PATCH 15/36] =?UTF-8?q?Upload=20other=20files=20to=20S3=20when?= =?UTF-8?q?=20get=5Fother=5Ffiles=20is=20True=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_service.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index a54036a8..3465a46e 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -102,14 +102,16 @@ class PashubService: ) self._save_site_notes(upload_records) + if downloaded.other: + self._upload_to_s3_and_update_db( + downloaded.other, uprn, hubspot_deal_id, file_source + ) + # SharePoint upload disabled: pashub sharepoint_link is inconsistent # (points to property or project unpredictably) # if request.sharepoint_link: # self._upload_to_sharepoint(request.sharepoint_link, downloaded.core) - if request.get_other_files: - pass # TODO: process downloaded.other - for file_path in downloaded.core + downloaded.other: try: os.remove(file_path) From af61c362b204181822ab3c97e4e4e99514034c30 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 09:39:39 +0000 Subject: [PATCH 16/36] =?UTF-8?q?Other=20files=20persisted=20to=20DB=20wit?= =?UTF-8?q?h=20file=5Ftype=20OTHER=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_service.py | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index d8467209..6cf889a2 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -3,7 +3,7 @@ from typing import Any, Callable, Optional from unittest.mock import MagicMock, call, patch -from backend.app.db.models.uploaded_file import FileSourceEnum +from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum from backend.pashub_fetcher.pashub_client import ( DownloadedFiles, PashubClient, @@ -434,6 +434,42 @@ def test_run_uploads_other_files_to_s3_when_get_other_files_true() -> None: ) +# --------------------------------------------------------------------------- +# run(): get_other_files=True → other files persisted with file_type OTHER +# --------------------------------------------------------------------------- + + +def test_run_persists_other_files_with_other_file_type() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=[], + other=["/tmp/unknown_file.pdf"], + ) + + fake_session = MagicMock() + service = make_service(pashub_client=mock_client) + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), + patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + mock_db.return_value.__enter__.return_value = fake_session + service.run(make_request(uprn="12345", get_other_files=True)) + + # Assert + all_added = [ + item + for c in fake_session.add_all.call_args_list + for item in c[0][0] + ] + assert len(all_added) == 1 + assert all_added[0].file_type == FileTypeEnum.OTHER.value + + def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None From fa450a37a36a4868cd0d245b0f2a1fa15d6e4ea0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 09:41:39 +0000 Subject: [PATCH 17/36] =?UTF-8?q?Other=20files=20persisted=20to=20DB=20wit?= =?UTF-8?q?h=20file=5Ftype=20OTHER=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_service.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index 3465a46e..305c7252 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -104,7 +104,11 @@ class PashubService: if downloaded.other: self._upload_to_s3_and_update_db( - downloaded.other, uprn, hubspot_deal_id, file_source + downloaded.other, + uprn, + hubspot_deal_id, + file_source, + default_file_type=FileTypeEnum.OTHER.value, ) # SharePoint upload disabled: pashub sharepoint_link is inconsistent @@ -126,6 +130,7 @@ class PashubService: uprn: Optional[str], hubspot_deal_id: Optional[str], file_source: FileSourceEnum, + default_file_type: Optional[str] = None, ) -> List[_FileUploadRecord]: if not uprn and not hubspot_deal_id: return [] @@ -152,7 +157,7 @@ class PashubService: uprn=int(uprn) if uprn else None, hubspot_deal_id=hubspot_deal_id, file_source=file_source.value, - file_type=get_file_type_string(filename), + file_type=get_file_type_string(filename) or default_file_type, ) file_paths.append(file_path) uploaded_files.append(uploaded_file) From 9efd4e34c2973c41d50d5a65ac1b7f549de65ed9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 09:42:41 +0000 Subject: [PATCH 18/36] =?UTF-8?q?run()=20returns=20core=20and=20other=20fi?= =?UTF-8?q?le=20paths=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_service.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 6cf889a2..df77287d 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -73,6 +73,30 @@ def test_run_returns_file_paths() -> None: assert result == ["/tmp/a.pdf", "/tmp/b.pdf"] +# --------------------------------------------------------------------------- +# run(): returns core + other file paths when get_other_files=True +# --------------------------------------------------------------------------- + + +def test_run_returns_core_and_other_file_paths() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"], + other=["/tmp/other.pdf"], + ) + + service = make_service(pashub_client=mock_client) + + # Act + with patch("backend.pashub_fetcher.pashub_service.os.remove"): + result = service.run(make_request(get_other_files=True)) + + # Assert + assert result == ["/tmp/core.pdf", "/tmp/other.pdf"] + + # --------------------------------------------------------------------------- # run(): skips upload when neither uprn nor hubspot_deal_id # --------------------------------------------------------------------------- From 62676cfbc90ad38e9b228767e2f52a4625cbad9b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 09:43:48 +0000 Subject: [PATCH 19/36] =?UTF-8?q?run()=20returns=20core=20and=20other=20fi?= =?UTF-8?q?le=20paths=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index 305c7252..8442adc3 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -122,7 +122,7 @@ class PashubService: except OSError: logger.warning(f"Failed to delete temp file {file_path}") - return downloaded.core + return downloaded.core + downloaded.other def _upload_to_s3_and_update_db( self, From 5df874b9782e37d3b9fd03afb3b5b3264463902e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 12:54:12 +0000 Subject: [PATCH 20/36] evidence categories plus typehinting --- backend/pashub_fetcher/evidence_categories.py | 63 +++++++++++++++++++ backend/pashub_fetcher/pashub_client.py | 4 +- 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 backend/pashub_fetcher/evidence_categories.py diff --git a/backend/pashub_fetcher/evidence_categories.py b/backend/pashub_fetcher/evidence_categories.py new file mode 100644 index 00000000..4e412364 --- /dev/null +++ b/backend/pashub_fetcher/evidence_categories.py @@ -0,0 +1,63 @@ +EVIDENCE_CATEGORIES = [ + "Advice report", + "Air Tests - BGV", + "Air Tightness Strategy", + "Assessment report", + "Blue Site Notes (PAS Assessment)", + "Building Assessment report", + "Building Condition report", + "Building Regulations Sign-off", + "Claim of compliance PAS2030", + "Claim of compliance PAS2035", + "Commissioning checklist", + "Condition report", + "Contract / Invoice", + "Electrical Certificate", + "Energy report", + "Evidence of submission to CPS", + "Floor Plan", + "Full Property Assessment", + "Gas Appliance Benchmarking Certificate", + "Gas Appliance Commissioning Checklist", + "Gas Inspection Certificate", + "Handover and Commissioning Documents", + "Handover Documents", + "Handover documents for client", + "Heat Demand Calculations", + "Heritage Impact Assessment", + "Improvement option evaluation", + "Installation Guides", + "Insurance guarantee", + "Intended outcomes", + "MCS Compliance Certificate", + "Medium term improvement plan", + "Medium term low carbon plan", + "Mid Photo", + "Mid-Install Inspection", + "Minor Works Electrical Certificate", + "Monitoring and evaluation outcomes", + "Occupancy assessment", + "Other", + "Other commissioning certificates", + "Photo", + "Post Energy Performance Report (EPR)", + "Post installation RdSAP", + "Post Photo", + "Pre Energy Performance Report (EPR)", + "Pre installation RdSAP", + "Pre Photo", + "Pre-Design Building Survey", + "Pre-Installation Building Inspection", + "Product Data sheets", + "Product warranty", + "Property Assessment", + "Qualifications", + "Retrofit design", + "Risk assessment", + "Significance survey", + "Site Note (Green /Blue) and Certificate(s)", + "Ventilation Assessment", + "Ventilation Assessment Checklist", + "Ventilation Report", + "Welsh - Checklist", +] diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 45767874..9969227b 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -52,7 +52,9 @@ class PashubClient: if not evidence_list: return DownloadedFiles(core=[], other=[]) - grouped = self._group_into_core_and_other_files(evidence_list) + grouped: _EvidenceFileGroups = self._group_into_core_and_other_files( + evidence_list + ) core_paths: List[str] = [] for _, evidence in grouped.core.items(): From ca224515f972f09cfd58438ea6b0ba630b4f5bd5 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 14:20:35 +0000 Subject: [PATCH 21/36] =?UTF-8?q?MCS=20cert=20identified=20by=20evidence?= =?UTF-8?q?=5Fcategory=20in=20get=5Fcore=5Ffile=5Ftype=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/db/models/uploaded_file.py | 1 + backend/pashub_fetcher/core_files.py | 7 +++- .../pashub_fetcher/tests/test_core_files.py | 33 +++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index b5367305..a3ea70ca 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -21,6 +21,7 @@ class FileTypeEnum(enum.Enum): IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation" MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan" RETROFIT_DESIGN_DOC = "retrofit_design_doc" + MCS_CERTIFICATE = "mcs_certificate" OTHER = "other" diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index e63511eb..1ee3046b 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -17,6 +17,7 @@ class CoreFiles(Enum): IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation" MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan" RETROFIT_DESIGN_DOC = "Retrofit Design Doc" + MCS_CERTIFICATE = "MCS Certificate" _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { @@ -32,6 +33,7 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value, + CoreFiles.MCS_CERTIFICATE: FileTypeEnum.MCS_CERTIFICATE.value, } @@ -56,6 +58,7 @@ def get_core_file_type( CoreFiles.RETROFIT_DESIGN_DOC, CoreFiles.IMPROVEMENT_OPTION_EVALUATION, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN, + CoreFiles.MCS_CERTIFICATE, } for core_file in CoreFiles: @@ -68,7 +71,9 @@ def get_core_file_type( return None -def get_file_type_string(filename: str) -> Optional[str]: +def get_file_type_string( + filename: str, evidence_category: Optional[str] = None +) -> Optional[str]: core_file: Optional[CoreFiles] = get_core_file_type(filename) if core_file is None: diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 3c1d11b8..c14a5a83 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -183,3 +183,36 @@ def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present # Assert assert result is None + + +def test_core_file_for_mcs_compliance_certificate_returns_mcs_certificate() -> None: + # Arrange + filename = "MCS_cert_job123.pdf" + + # Act + result = get_core_file_type(filename, evidence_category="mcs compliance certificate") + + # Assert + assert result == CoreFiles.MCS_CERTIFICATE + + +def test_core_file_for_mcs_compliance_certificate_is_case_insensitive() -> None: + # Arrange + filename = "some_cert.pdf" + + # Act + result = get_core_file_type(filename, evidence_category="MCS Compliance Certificate") + + # Assert + assert result == CoreFiles.MCS_CERTIFICATE + + +def test_get_file_type_string_with_mcs_evidence_category_returns_mcs_certificate() -> None: + # Arrange + filename = "some_cert.pdf" + + # Act + result = get_file_type_string(filename, evidence_category="MCS Compliance Certificate") + + # Assert + assert result == "mcs_certificate" From 98eccda0ba695c6f221f634e197612cc10f0b277 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 14:21:58 +0000 Subject: [PATCH 22/36] =?UTF-8?q?MCS=20cert=20identified=20by=20evidence?= =?UTF-8?q?=5Fcategory=20in=20get=5Fcore=5Ffile=5Ftype=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/core_files.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 1ee3046b..1717c108 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -40,6 +40,9 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { def get_core_file_type( filename: str, evidence_category: Optional[str] = None ) -> Optional[CoreFiles]: + if evidence_category is not None and evidence_category.lower() == "mcs compliance certificate": + return CoreFiles.MCS_CERTIFICATE + # Identify retrofit design doc using evidence category as the name is possibly unreliable. # We might change to always use evidence category, but needs more investigation if evidence_category is not None and evidence_category.lower() == "retrofit design": @@ -74,7 +77,7 @@ def get_core_file_type( def get_file_type_string( filename: str, evidence_category: Optional[str] = None ) -> Optional[str]: - core_file: Optional[CoreFiles] = get_core_file_type(filename) + core_file: Optional[CoreFiles] = get_core_file_type(filename, evidence_category) if core_file is None: return None From d68cbb44f8fca4b37b651652a22eabe8a1febdf2 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 14:24:24 +0000 Subject: [PATCH 23/36] =?UTF-8?q?Downloaded=20files=20carry=20evidence=5Fc?= =?UTF-8?q?ategory=20as=20DownloadedFile=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 6 +++++ .../tests/test_pashub_client.py | 25 ++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 9969227b..a6284d27 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -13,6 +13,12 @@ from utils.logger import setup_logger logger = setup_logger() +class DownloadedFile(NamedTuple): + file_path: str + evidence_category: Optional[str] + created_utc: datetime + + class _EvidenceFileGroups(NamedTuple): core: Dict[CoreFiles, EvidenceFileData] other: List[EvidenceFileData] diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index c6e7b780..f072731b 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -5,7 +5,7 @@ from unittest.mock import patch from backend.pashub_fetcher.core_files import CoreFiles from backend.pashub_fetcher.evidence_file_data import EvidenceFileData from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata -from backend.pashub_fetcher.pashub_client import DownloadedFiles, PashubClient +from backend.pashub_fetcher.pashub_client import DownloadedFile, DownloadedFiles, PashubClient def make_metadata() -> EvidenceMetadata: @@ -169,6 +169,29 @@ def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_ assert result.other == [] +def test_get_evidence_files_by_job_id_core_files_carry_evidence_category() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="MCS_cert.pdf", + evidence_category="MCS Compliance Certificate", + ), + ] + + # Act + with ( + patch.object(client, "_get_evidence_list", return_value=files), + patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), + patch.object(client, "_download_file"), + ): + result = client.get_evidence_files_by_job_id("job-1", include_other=False) + + # Assert + assert len(result.core) == 1 + assert result.core[0].evidence_category == "MCS Compliance Certificate" + + def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> None: # Arrange client = make_client() From bdc35573b57733465ad165d48224327f1ba9ecbc Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 14:27:54 +0000 Subject: [PATCH 24/36] =?UTF-8?q?Downloaded=20files=20carry=20evidence=5Fc?= =?UTF-8?q?ategory=20as=20DownloadedFile=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 26 +++++++--- backend/pashub_fetcher/pashub_service.py | 21 +++++---- .../tests/test_pashub_client.py | 47 +++++++++++++++++-- .../tests/test_pashub_service.py | 10 +++- 4 files changed, 83 insertions(+), 21 deletions(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index a6284d27..e10fbec7 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -25,8 +25,8 @@ class _EvidenceFileGroups(NamedTuple): class DownloadedFiles(NamedTuple): - core: List[str] - other: List[str] + core: List[DownloadedFile] + other: List[DownloadedFile] class UnauthorizedError(Exception): @@ -62,7 +62,7 @@ class PashubClient: evidence_list ) - core_paths: List[str] = [] + core_files: List[DownloadedFile] = [] for _, evidence in grouped.core.items(): if not evidence.file_id: continue @@ -73,9 +73,15 @@ class PashubClient: file_path: str = os.path.join("/tmp", evidence.file_name) self._download_file(download_url, file_path) logger.info("Successfully downloaded file") - core_paths.append(file_path) + core_files.append( + DownloadedFile( + file_path=file_path, + evidence_category=evidence.evidence_category, + created_utc=datetime.fromisoformat(evidence.created_utc), + ) + ) - other_paths: List[str] = [] + other_files: List[DownloadedFile] = [] if include_other: for evidence in grouped.other: @@ -88,9 +94,15 @@ class PashubClient: self._download_file(download_url, file_path) logger.info("Successfully downloaded other file") - other_paths.append(file_path) + other_files.append( + DownloadedFile( + file_path=file_path, + evidence_category=evidence.evidence_category, + created_utc=datetime.fromisoformat(evidence.created_utc), + ) + ) - return DownloadedFiles(core=core_paths, other=other_paths) + return DownloadedFiles(core=core_files, other=other_files) def get_uprn_by_job_id(self, job_id: str) -> Optional[str]: logger.info(f"Getting UPRN for job ID {job_id}") diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index 8442adc3..881fc583 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -12,6 +12,7 @@ from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf from backend.pashub_fetcher.core_files import get_file_type_string from backend.pashub_fetcher.pashub_client import ( + DownloadedFile, DownloadedFiles, PashubClient, UnauthorizedError, @@ -116,17 +117,17 @@ class PashubService: # if request.sharepoint_link: # self._upload_to_sharepoint(request.sharepoint_link, downloaded.core) - for file_path in downloaded.core + downloaded.other: + for df in downloaded.core + downloaded.other: try: - os.remove(file_path) + os.remove(df.file_path) except OSError: - logger.warning(f"Failed to delete temp file {file_path}") + logger.warning(f"Failed to delete temp file {df.file_path}") - return downloaded.core + downloaded.other + return [df.file_path for df in downloaded.core + downloaded.other] def _upload_to_s3_and_update_db( self, - job_files: List[str], + job_files: List[DownloadedFile], uprn: Optional[str], hubspot_deal_id: Optional[str], file_source: FileSourceEnum, @@ -144,11 +145,11 @@ class PashubService: file_paths: List[str] = [] uploaded_files: List[UploadedFile] = [] - for file_path in job_files: - filename = os.path.basename(file_path) + for df in job_files: + filename = os.path.basename(df.file_path) file_key = f"{base_path}/{filename}" - upload_file_to_s3(file_path, self._s3_bucket, file_key) + upload_file_to_s3(df.file_path, self._s3_bucket, file_key) uploaded_file = UploadedFile( s3_file_bucket=self._s3_bucket, @@ -157,9 +158,9 @@ class PashubService: uprn=int(uprn) if uprn else None, hubspot_deal_id=hubspot_deal_id, file_source=file_source.value, - file_type=get_file_type_string(filename) or default_file_type, + file_type=get_file_type_string(filename, df.evidence_category) or default_file_type, ) - file_paths.append(file_path) + file_paths.append(df.file_path) uploaded_files.append(uploaded_file) with db_session() as session: diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index f072731b..50c91b85 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -119,6 +119,47 @@ def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_ assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" +def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="MCS_cert_job123.pdf", + evidence_category="MCS Compliance Certificate", + ), + ] + + # Act + result = client._group_into_core_and_other_files(files) + + # Assert + assert CoreFiles.MCS_CERTIFICATE in result.core + assert result.other == [] + + +def test_group_into_core_and_other_files_picks_most_recent_mcs_cert() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="mcs_cert_old.pdf", + evidence_category="MCS Compliance Certificate", + created_utc="2024-01-01T00:00:00", + ), + make_file( + file_name="mcs_cert_new.pdf", + evidence_category="MCS Compliance Certificate", + created_utc="2024-06-01T00:00:00", + ), + ] + + # Act + result = client._group_into_core_and_other_files(files) + + # Assert + assert result.core[CoreFiles.MCS_CERTIFICATE].file_name == "mcs_cert_new.pdf" + + def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> None: # Arrange client = make_client() @@ -165,7 +206,7 @@ def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_ # Assert assert isinstance(result, DownloadedFiles) - assert result.core == ["/tmp/SiteNote_001.pdf"] + assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"] assert result.other == [] @@ -209,5 +250,5 @@ def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_t result = client.get_evidence_files_by_job_id("job-1", include_other=True) # Assert - assert result.core == ["/tmp/SiteNote_001.pdf"] - assert result.other == ["/tmp/unknown_doc.pdf"] + assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"] + assert [df.file_path for df in result.other] == ["/tmp/unknown_doc.pdf"] diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index df77287d..988dc854 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -1,10 +1,12 @@ import pytest +from datetime import datetime from typing import Any, Callable, Optional from unittest.mock import MagicMock, call, patch from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum from backend.pashub_fetcher.pashub_client import ( + DownloadedFile, DownloadedFiles, PashubClient, UnauthorizedError, @@ -49,8 +51,14 @@ def make_service( ) +_DEFAULT_UTC = datetime(2024, 1, 1) + + def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles: - return DownloadedFiles(core=core, other=other) + return DownloadedFiles( + core=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in core], + other=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in other], + ) # --------------------------------------------------------------------------- From 1a1804399e6dad920a90aeb861d6f8223a001159 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 14:29:52 +0000 Subject: [PATCH 25/36] =?UTF-8?q?Service=20persists=20mcs=5Fcertificate=20?= =?UTF-8?q?file=5Ftype=20when=20evidence=5Fcategory=20is=20MCS=20cert=20?= =?UTF-8?q?=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_service.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 988dc854..c00e12b1 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -502,6 +502,33 @@ def test_run_persists_other_files_with_other_file_type() -> None: assert all_added[0].file_type == FileTypeEnum.OTHER.value +def test_run_persists_mcs_cert_with_mcs_certificate_file_type() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = DownloadedFiles( + core=[DownloadedFile("/tmp/MCS_cert.pdf", "MCS Compliance Certificate", datetime(2024, 1, 1))], + other=[], + ) + + fake_session = MagicMock() + service = make_service(pashub_client=mock_client) + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), + patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + mock_db.return_value.__enter__.return_value = fake_session + service.run(make_request(uprn="12345")) + + # Assert + fake_session.add_all.assert_called_once() + added: list[Any] = fake_session.add_all.call_args[0][0] + assert added[0].file_type == FileTypeEnum.MCS_CERTIFICATE.value + + def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None From 223c1a259ba3fa58f0dd2d7fa96017ade9aee4f3 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 2 Jun 2026 14:32:13 +0000 Subject: [PATCH 26/36] adjust comment about evidence type --- backend/pashub_fetcher/core_files.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 1717c108..00129954 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -40,11 +40,14 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { def get_core_file_type( filename: str, evidence_category: Optional[str] = None ) -> Optional[CoreFiles]: - if evidence_category is not None and evidence_category.lower() == "mcs compliance certificate": + # Identify MCS certificate and design doc using evidence category as the names are possibly unreliable. + # We might change to always use evidence category, but needs more investigation + if ( + evidence_category is not None + and evidence_category.lower() == "mcs compliance certificate" + ): return CoreFiles.MCS_CERTIFICATE - # Identify retrofit design doc using evidence category as the name is possibly unreliable. - # We might change to always use evidence category, but needs more investigation if evidence_category is not None and evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC From 37ec4095537b0c522e38a4ef5ae3ec9c4ed01314 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 3 Jun 2026 08:21:54 +0000 Subject: [PATCH 27/36] tweak local trigger --- .../trigger_pashub_sqs_from_file.py | 62 ++++++++++--------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py index c751171a..cb6c2ffd 100644 --- a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py +++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py @@ -16,38 +16,40 @@ logger: logging.Logger = logging.getLogger(__name__) DRY_RUN: bool = False -DEAL_ID_FILTER: frozenset[str] = frozenset( - { - "379452094688", - "379466504437", - "379660170452", - "380016925932", - "379848065216", - "379466504434", - "379452094690", - "379965924567", - "380016925923", - "379792072898", - "379654754502", - "379560262861", - "379969670369", - "379248717001", - "379971468493", - "379999888607", - "379606372580", - "379969603797", - "379967743213", - "379263155434", - "379855267025", - "379889899719", - "379071064307", - "379867925741", - } -) +# DEAL_ID_FILTER: frozenset[str] = frozenset( +# { +# "379452094688", +# "379466504437", +# "379660170452", +# "380016925932", +# "379848065216", +# "379466504434", +# "379452094690", +# "379965924567", +# "380016925923", +# "379792072898", +# "379654754502", +# "379560262861", +# "379969670369", +# "379248717001", +# "379971468493", +# "379999888607", +# "379606372580", +# "379969603797", +# "379967743213", +# "379263155434", +# "379855267025", +# "379889899719", +# "379071064307", +# "379867925741", +# } +# ) + +DEAL_ID_FILTER = None EXCEL_PATH: str = os.path.join( os.path.dirname(__file__), - "united-infrastructure-exports-all-deals-2026-05-14.xlsx", + "local_run_02-06-2026/NCHA WAVE 3 RAs.xlsx", ) @@ -64,7 +66,7 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: pashub_col: int = headers["PasHub link"] record_id_col: int = headers["Record ID"] deal_name_col: int = headers["Deal Name"] - deal_stage_col: Optional[int] = headers["Deal Stage"] + deal_stage_col: Optional[int] = headers.get("Deal Stage", None) requests: list[PashubToAraTriggerRequest] = [] From 39fa1ccfa0406a29cb6a594c7c332b7615e9df45 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 3 Jun 2026 08:49:37 +0000 Subject: [PATCH 28/36] =?UTF-8?q?All=20downloaded=20PasHub=20files=20uploa?= =?UTF-8?q?ded=20to=20SharePoint=20property=20folder=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_service.py | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index c00e12b1..522d4086 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -27,6 +27,7 @@ def make_request( hubspot_deal_id: Optional[str] = None, sharepoint_link: Optional[str] = None, get_other_files: bool = False, + address: Optional[str] = None, ) -> PashubToAraTriggerRequest: return PashubToAraTriggerRequest( pashub_link=pashub_link, @@ -34,6 +35,7 @@ def make_request( hubspot_deal_id=hubspot_deal_id, sharepoint_link=sharepoint_link, get_other_files=get_other_files, + address=address, ) @@ -529,6 +531,78 @@ def test_run_persists_mcs_cert_with_mcs_certificate_file_type() -> None: assert added[0].file_type == FileTypeEnum.MCS_CERTIFICATE.value +# --------------------------------------------------------------------------- +# run(): SharePoint upload +# --------------------------------------------------------------------------- + + +def test_sharepoint_uploads_all_files_to_property_folder() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"], + other=["/tmp/other.pdf"], + ) + + mock_sharepoint = MagicMock(spec=DomnaSharepointClient) + mock_sharepoint.get_folders_in_path.return_value = { + "value": [{"name": "123 Main St"}] + } + + service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint) + + # Act + with patch("backend.pashub_fetcher.pashub_service.os.remove"): + service.run( + make_request( + sharepoint_link="Retrofit/Properties", + get_other_files=True, + address="123 Main St | some deal", + ) + ) + + # Assert + mock_sharepoint.upload_file.assert_any_call( + "/tmp/core.pdf", "Retrofit/Properties/123 Main St", "core.pdf" + ) + mock_sharepoint.upload_file.assert_any_call( + "/tmp/other.pdf", "Retrofit/Properties/123 Main St", "other.pdf" + ) + + +def test_sharepoint_skips_upload_when_folder_not_found() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"] + ) + + mock_sharepoint = MagicMock(spec=DomnaSharepointClient) + mock_sharepoint.get_folders_in_path.return_value = { + "value": [{"name": "Different Property"}] + } + + service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint) + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.os.remove"), + patch("backend.pashub_fetcher.pashub_service.logger") as mock_logger, + ): + service.run( + make_request( + sharepoint_link="Retrofit/Properties", + address="No Such Property | deal", + ) + ) + + # Assert + mock_sharepoint.upload_file.assert_not_called() + mock_logger.warning.assert_called() + + def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None From 7984a6ded8103d522bea093f87c175b8d8d47529 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 3 Jun 2026 08:52:30 +0000 Subject: [PATCH 29/36] =?UTF-8?q?All=20downloaded=20PasHub=20files=20uploa?= =?UTF-8?q?ded=20to=20SharePoint=20property=20folder=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_service.py | 32 ++++++++++++------- .../trigger_pashub_sqs_from_file.py | 3 ++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index 881fc583..2b7a7f03 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -20,7 +20,6 @@ from backend.pashub_fetcher.pashub_client import ( from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, ) -from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders from datatypes.epc.domain.epc_property_data import EpcPropertyData from utils.logger import setup_logger from utils.s3 import upload_file_to_s3 @@ -112,10 +111,18 @@ class PashubService: default_file_type=FileTypeEnum.OTHER.value, ) - # SharePoint upload disabled: pashub sharepoint_link is inconsistent - # (points to property or project unpredictably) - # if request.sharepoint_link: - # self._upload_to_sharepoint(request.sharepoint_link, downloaded.core) + if request.sharepoint_link and request.address: + folder_name = request.address.split("|")[0].strip() + folders = self._sharepoint_client.get_folders_in_path(request.sharepoint_link) + match = next( + (f["name"] for f in folders.get("value", []) if f["name"].lower() == folder_name.lower()), + None, + ) + if match is None: + logger.warning(f"SharePoint folder not found for '{folder_name}' in {request.sharepoint_link}") + else: + property_folder_path = f"{request.sharepoint_link}/{match}" + self._upload_to_sharepoint(property_folder_path, downloaded.core + downloaded.other) for df in downloaded.core + downloaded.other: try: @@ -197,11 +204,12 @@ class PashubService: def _upload_to_sharepoint( self, - sharepoint_link: str, - job_files: List[str], + property_folder_path: str, + files: List[DownloadedFile], ) -> None: - assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}" - - for file_path in job_files: - filename = file_path.split("/")[-1] - self._sharepoint_client.upload_file(file_path, assessment_path, filename) + for df in files: + filename = os.path.basename(df.file_path) + try: + self._sharepoint_client.upload_file(df.file_path, property_folder_path, filename) + except Exception: + logger.warning(f"Failed to upload {filename} to SharePoint", exc_info=True) diff --git a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py index cb6c2ffd..cddd2553 100644 --- a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py +++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py @@ -52,6 +52,8 @@ EXCEL_PATH: str = os.path.join( "local_run_02-06-2026/NCHA WAVE 3 RAs.xlsx", ) +SHAREPOINT_PROPERTIES_FOLDER: str = "" + def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: wb = load_workbook(excel_path, data_only=True) @@ -99,6 +101,7 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: hubspot_deal_id=hubspot_deal_id, address=address, deal_stage=deal_stage, + sharepoint_link=SHAREPOINT_PROPERTIES_FOLDER or None, ) ) From 19e40ff04991f8b91bc51cb8b984c9cd699f1110 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 3 Jun 2026 09:00:01 +0000 Subject: [PATCH 30/36] rename mcs_certificate to mcs_compliance_certificate to match existing db enum value --- backend/app/db/models/uploaded_file.py | 2 +- backend/pashub_fetcher/core_files.py | 8 ++-- .../pashub_fetcher/tests/test_core_files.py | 24 ++++++---- .../tests/test_pashub_client.py | 48 ++++++++++++++----- .../tests/test_pashub_service.py | 45 ++++++++++------- 5 files changed, 87 insertions(+), 40 deletions(-) diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index a3ea70ca..e00acbe1 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -21,7 +21,7 @@ class FileTypeEnum(enum.Enum): IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation" MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan" RETROFIT_DESIGN_DOC = "retrofit_design_doc" - MCS_CERTIFICATE = "mcs_certificate" + MCS_COMPLIANCE_CERTIFICATE = "mcs_compliance_certificate" OTHER = "other" diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 00129954..c387e0b8 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -17,7 +17,7 @@ class CoreFiles(Enum): IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation" MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan" RETROFIT_DESIGN_DOC = "Retrofit Design Doc" - MCS_CERTIFICATE = "MCS Certificate" + MCS_COMPLIANCE_CERTIFICATE = "MCS Compliance Certificate" _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { @@ -33,7 +33,7 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value, - CoreFiles.MCS_CERTIFICATE: FileTypeEnum.MCS_CERTIFICATE.value, + CoreFiles.MCS_COMPLIANCE_CERTIFICATE: FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value, } @@ -46,7 +46,7 @@ def get_core_file_type( evidence_category is not None and evidence_category.lower() == "mcs compliance certificate" ): - return CoreFiles.MCS_CERTIFICATE + return CoreFiles.MCS_COMPLIANCE_CERTIFICATE if evidence_category is not None and evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC @@ -64,7 +64,7 @@ def get_core_file_type( CoreFiles.RETROFIT_DESIGN_DOC, CoreFiles.IMPROVEMENT_OPTION_EVALUATION, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN, - CoreFiles.MCS_CERTIFICATE, + CoreFiles.MCS_COMPLIANCE_CERTIFICATE, } for core_file in CoreFiles: diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index c14a5a83..a2047ece 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -185,15 +185,17 @@ def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present assert result is None -def test_core_file_for_mcs_compliance_certificate_returns_mcs_certificate() -> None: +def test_core_file_for_mcs_compliance_certificate_returns_mcs_compliance_certificate() -> None: # Arrange filename = "MCS_cert_job123.pdf" # Act - result = get_core_file_type(filename, evidence_category="mcs compliance certificate") + result = get_core_file_type( + filename, evidence_category="mcs compliance certificate" + ) # Assert - assert result == CoreFiles.MCS_CERTIFICATE + assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE def test_core_file_for_mcs_compliance_certificate_is_case_insensitive() -> None: @@ -201,18 +203,24 @@ def test_core_file_for_mcs_compliance_certificate_is_case_insensitive() -> None: filename = "some_cert.pdf" # Act - result = get_core_file_type(filename, evidence_category="MCS Compliance Certificate") + result = get_core_file_type( + filename, evidence_category="MCS Compliance Certificate" + ) # Assert - assert result == CoreFiles.MCS_CERTIFICATE + assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE -def test_get_file_type_string_with_mcs_evidence_category_returns_mcs_certificate() -> None: +def test_get_file_type_string_with_mcs_evidence_category_returns_mcs_compliance_certificate() -> ( + None +): # Arrange filename = "some_cert.pdf" # Act - result = get_file_type_string(filename, evidence_category="MCS Compliance Certificate") + result = get_file_type_string( + filename, evidence_category="MCS Compliance Certificate" + ) # Assert - assert result == "mcs_certificate" + assert result == "mcs_compliance_certificate" diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 50c91b85..214a14a6 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -5,7 +5,11 @@ from unittest.mock import patch from backend.pashub_fetcher.core_files import CoreFiles from backend.pashub_fetcher.evidence_file_data import EvidenceFileData from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata -from backend.pashub_fetcher.pashub_client import DownloadedFile, DownloadedFiles, PashubClient +from backend.pashub_fetcher.pashub_client import ( + DownloadedFile, + DownloadedFiles, + PashubClient, +) def make_metadata() -> EvidenceMetadata: @@ -70,7 +74,10 @@ def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> result = client._group_into_core_and_other_files(files) # Assert - assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name + == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + ) def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None: @@ -93,10 +100,15 @@ def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> No result = client._group_into_core_and_other_files(files) # Assert - assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name + == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + ) -def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> None: +def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> ( + None +): # Arrange client = make_client() files = [ @@ -116,7 +128,10 @@ def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_ result = client._group_into_core_and_other_files(files) # Assert - assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name + == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" + ) def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None: @@ -133,7 +148,7 @@ def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None: result = client._group_into_core_and_other_files(files) # Assert - assert CoreFiles.MCS_CERTIFICATE in result.core + assert CoreFiles.MCS_COMPLIANCE_CERTIFICATE in result.core assert result.other == [] @@ -157,10 +172,15 @@ def test_group_into_core_and_other_files_picks_most_recent_mcs_cert() -> None: result = client._group_into_core_and_other_files(files) # Assert - assert result.core[CoreFiles.MCS_CERTIFICATE].file_name == "mcs_cert_new.pdf" + assert ( + result.core[CoreFiles.MCS_COMPLIANCE_CERTIFICATE].file_name + == "mcs_cert_new.pdf" + ) -def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> None: +def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> ( + None +): # Arrange client = make_client() files = [ @@ -180,7 +200,9 @@ def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candid result = client._group_into_core_and_other_files(files) # Assert - assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + ) # --------------------------------------------------------------------------- @@ -188,7 +210,9 @@ def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candid # --------------------------------------------------------------------------- -def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_when_include_other_false() -> None: +def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_when_include_other_false() -> ( + None +): # Arrange client = make_client() files = [ @@ -233,7 +257,9 @@ def test_get_evidence_files_by_job_id_core_files_carry_evidence_category() -> No assert result.core[0].evidence_category == "MCS Compliance Certificate" -def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> None: +def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> ( + None +): # Arrange client = make_client() files = [ diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 522d4086..4ceeb832 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -17,7 +17,6 @@ from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( ) from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient - FAKE_JOB_LINK = "https://pashub.net/jobs/job-id-123/details" @@ -291,7 +290,9 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: factory = MagicMock(return_value=coord_client) - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -317,7 +318,9 @@ def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: factory = MagicMock(return_value=coord_client) - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -350,13 +353,17 @@ def test_run_raises_unauthorized_when_both_clients_401() -> None: factory = MagicMock(return_value=coord_client) - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with pytest.raises(UnauthorizedError): service.run(make_request()) -def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> None: +def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> ( + None +): pas_client = MagicMock(spec=PashubClient) pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() @@ -369,7 +376,9 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() factory = MagicMock(return_value=coord_client) fake_session = MagicMock() - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -384,7 +393,9 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value -def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None: +def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> ( + None +): pas_client = MagicMock(spec=PashubClient) pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() @@ -396,7 +407,9 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing( factory = MagicMock(return_value=coord_client) fake_session = MagicMock() - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -495,21 +508,21 @@ def test_run_persists_other_files_with_other_file_type() -> None: service.run(make_request(uprn="12345", get_other_files=True)) # Assert - all_added = [ - item - for c in fake_session.add_all.call_args_list - for item in c[0][0] - ] + all_added = [item for c in fake_session.add_all.call_args_list for item in c[0][0]] assert len(all_added) == 1 assert all_added[0].file_type == FileTypeEnum.OTHER.value -def test_run_persists_mcs_cert_with_mcs_certificate_file_type() -> None: +def test_run_persists_mcs_cert_with_mcs_compliance_certificate_file_type() -> None: # Arrange mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = DownloadedFiles( - core=[DownloadedFile("/tmp/MCS_cert.pdf", "MCS Compliance Certificate", datetime(2024, 1, 1))], + core=[ + DownloadedFile( + "/tmp/MCS_cert.pdf", "MCS Compliance Certificate", datetime(2024, 1, 1) + ) + ], other=[], ) @@ -528,7 +541,7 @@ def test_run_persists_mcs_cert_with_mcs_certificate_file_type() -> None: # Assert fake_session.add_all.assert_called_once() added: list[Any] = fake_session.add_all.call_args[0][0] - assert added[0].file_type == FileTypeEnum.MCS_CERTIFICATE.value + assert added[0].file_type == FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value # --------------------------------------------------------------------------- From d31d0683f574fcd5b24fecfcfb1ea0e6a4f1c4b1 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 3 Jun 2026 14:23:08 +0000 Subject: [PATCH 31/36] get triggers working for abri address list --- .../trigger_local_lambda_from_file.py | 15 +++++++++------ .../trigger_pashub_sqs_from_file.py | 8 ++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/backend/pashub_fetcher/trigger_local_lambda_from_file.py b/backend/pashub_fetcher/trigger_local_lambda_from_file.py index fb9d1cbf..ad3097da 100644 --- a/backend/pashub_fetcher/trigger_local_lambda_from_file.py +++ b/backend/pashub_fetcher/trigger_local_lambda_from_file.py @@ -10,19 +10,19 @@ from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( ) from backend.pashub_fetcher.handler.handler import handler - if __name__ == "__main__": BASE_DIR = os.path.dirname(os.path.dirname(__file__)) filepath: str = os.path.join( BASE_DIR, "pashub_fetcher", - "The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx", + "local_run_02-06-2026", + "ECO_Approach_Coordination_Design_KN.xlsx", ) wb = load_workbook(filepath, data_only=True) - ws = wb["filtered_2"] + ws = wb["filtered"] - HEADER_ROW = 3 + HEADER_ROW = 1 headers: Dict[str, int] = {} for col in range(1, ws.max_column + 1): @@ -31,7 +31,7 @@ if __name__ == "__main__": headers[value.strip()] = col name_col = headers["Name"] - link_col = headers["PasHub Link"] + link_col = headers["PasHub ID"] hubspot_deal_id_col = headers["HubSpot ID"] trigger_requests: List[PashubToAraTriggerRequest] = [] @@ -50,7 +50,10 @@ if __name__ == "__main__": trigger_requests.append( PashubToAraTriggerRequest( - pashub_link=str(link), hubspot_deal_id=str(hubspot_deal_id) + pashub_link=str(link), + hubspot_deal_id=str(hubspot_deal_id), + address=str(name), + get_other_files=True, ) ) diff --git a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py index cddd2553..118c0aca 100644 --- a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py +++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py @@ -49,7 +49,7 @@ DEAL_ID_FILTER = None EXCEL_PATH: str = os.path.join( os.path.dirname(__file__), - "local_run_02-06-2026/NCHA WAVE 3 RAs.xlsx", + "local_run_02-06-2026/ECO_Approach_Coordination_Design_KN.xlsx", ) SHAREPOINT_PROPERTIES_FOLDER: str = "" @@ -65,9 +65,9 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: if header_val is not None: headers[str(header_val).strip()] = col - pashub_col: int = headers["PasHub link"] - record_id_col: int = headers["Record ID"] - deal_name_col: int = headers["Deal Name"] + pashub_col: int = headers["PasHub ID"] + record_id_col: int = headers["HubSpot ID"] + deal_name_col: int = headers["Name"] deal_stage_col: Optional[int] = headers.get("Deal Stage", None) requests: list[PashubToAraTriggerRequest] = [] From 2d9aa51929c32d94d0ae1d75765e9842dcd58d1a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 3 Jun 2026 14:54:54 +0000 Subject: [PATCH 32/36] don't include sharepoint link when triggering pashub fetcher from hubspot etl --- etl/hubspot/scripts/scraper/main.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 176e9b15..589e526b 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -67,7 +67,9 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}" ) - _trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id) + _trigger_magicplan_fetcher( + sqs_client, hubspot_deal, listing, hubspot_deal_id + ) else: # Deal already in db, check whether anything has changed logger.info( @@ -119,13 +121,18 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}" ) - _trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id) + _trigger_magicplan_fetcher( + sqs_client, hubspot_deal, listing, hubspot_deal_id + ) print("done") def _trigger_magicplan_fetcher( - sqs_client: Any, hubspot_deal: Dict[str, str], listing: Optional[dict[str, str]], hubspot_deal_id: str + sqs_client: Any, + hubspot_deal: Dict[str, str], + listing: Optional[dict[str, str]], + hubspot_deal_id: str, ) -> None: message_body = { "address": hubspot_deal.get("dealname"), @@ -136,9 +143,7 @@ def _trigger_magicplan_fetcher( QueueUrl=get_settings().MAGICPLAN_SQS_URL, MessageBody=json.dumps(message_body), ) - logger.info( - f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}" - ) + logger.info(f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}") def _trigger_pashub_fetcher( @@ -148,7 +153,7 @@ def _trigger_pashub_fetcher( "pashub_link": hubspot_deal["pashub_link"], "address": None, # potentially available from Listing, leave as None for now "hubspot_deal_id": deal_id, - "sharepoint_link": hubspot_deal.get("sharepoint_link", None), + # "sharepoint_link": hubspot_deal.get("sharepoint_link", None), # Don't send sharepoint link for now as they are inconsistent "uprn": hubspot_deal.get("national_uprn", None), "landlord_property_id": hubspot_deal.get("owner_property_id", None), "deal_stage": hubspot_deal.get("deal_stage", None), From eb72b0223f629e2e3499b887598fd72142fe282a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 4 Jun 2026 09:11:08 +0000 Subject: [PATCH 33/36] add ECO_SHAREPOINT_ID env var --- .github/workflows/_deploy_lambda.yml | 4 ++++ .github/workflows/deploy_terraform.yml | 1 + backend/app/config.py | 1 + backend/pashub_fetcher/trigger_pashub_sqs_from_file.py | 8 +++++--- deployment/terraform/lambda/pashub_to_ara/main.tf | 1 + deployment/terraform/lambda/pashub_to_ara/variables.tf | 5 +++++ utils/sharepoint/domna_sites.py | 1 + 7 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 70f9eabe..0035a579 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -76,6 +76,8 @@ on: required: false TF_VAR_social_housing_wave_3_sharepoint_id: required: false + TF_VAR_eco_sharepoint_id: + required: false TF_VAR_pashub_email: required: false TF_VAR_pashub_password: @@ -159,6 +161,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} + TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }} @@ -210,6 +213,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} + TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }} diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index fc999bc0..fd003c8c 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -448,6 +448,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.OSMOSIS_ACD_SHAREPOINT_ID }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.PRIVATE_PAY_SHAREPOINT_ID }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }} + TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }} TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }} TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }} TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }} diff --git a/backend/app/config.py b/backend/app/config.py index f969518d..1dc3daaf 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -82,6 +82,7 @@ class Settings(BaseSettings): OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None + ECO_SHAREPOINT_ID: Optional[str] = None OPENAI_API_KEY: Optional[str] = None # Pas Hub diff --git a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py index 118c0aca..fe3ec7d8 100644 --- a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py +++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py @@ -52,12 +52,14 @@ EXCEL_PATH: str = os.path.join( "local_run_02-06-2026/ECO_Approach_Coordination_Design_KN.xlsx", ) -SHAREPOINT_PROPERTIES_FOLDER: str = "" +SHAREPOINT_PROPERTIES_FOLDER: str = ( + "https://domnagroup.sharepoint.com/:f:/s/ECO/IgCCLUg3PJ-eS7BLDAwiNlelAW_WXLCb9wcjkigbLfHhPOY?e=DTbNNv" +) def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: wb = load_workbook(excel_path, data_only=True) - ws = wb.worksheets[0] + ws = wb.worksheets[1] headers: dict[str, int] = {} for col in range(1, ws.max_column + 1): @@ -123,7 +125,7 @@ def main() -> None: for request in trigger_requests: action: str = "DRY RUN" if DRY_RUN else "SENDING" logger.info( - f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link}" + f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link} sharepoint_link={request.sharepoint_link}" ) if not DRY_RUN: diff --git a/deployment/terraform/lambda/pashub_to_ara/main.tf b/deployment/terraform/lambda/pashub_to_ara/main.tf index eba9c874..b5714055 100644 --- a/deployment/terraform/lambda/pashub_to_ara/main.tf +++ b/deployment/terraform/lambda/pashub_to_ara/main.tf @@ -47,6 +47,7 @@ module "lambda" { OSMOSIS_ACD_SHAREPOINT_ID = var.osmosis_acd_sharepoint_id PRIVATE_PAY_SHAREPOINT_ID = var.private_pay_sharepoint_id SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id + ECO_SHAREPOINT_ID = var.eco_sharepoint_id PASHUB_EMAIL = var.pashub_email PASHUB_PASSWORD = var.pashub_password PASHUB_COORDINATION_EMAIL = var.pashub_coordination_email diff --git a/deployment/terraform/lambda/pashub_to_ara/variables.tf b/deployment/terraform/lambda/pashub_to_ara/variables.tf index cdeff256..29b7af70 100644 --- a/deployment/terraform/lambda/pashub_to_ara/variables.tf +++ b/deployment/terraform/lambda/pashub_to_ara/variables.tf @@ -92,6 +92,11 @@ variable "social_housing_wave_3_sharepoint_id" { sensitive = true } +variable "eco_sharepoint_id" { + type = string + sensitive = true +} + variable "pashub_email" { type = string sensitive = true diff --git a/utils/sharepoint/domna_sites.py b/utils/sharepoint/domna_sites.py index e5efb82c..ce579af6 100644 --- a/utils/sharepoint/domna_sites.py +++ b/utils/sharepoint/domna_sites.py @@ -9,3 +9,4 @@ class DomnaSites(Enum): OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID") PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID") SOCIAL_HOUSING_WAVE_3 = os.getenv("SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID") + ECO = os.getenv("ECO_SHAREPOINT_ID") From 416dac0512b13146975c04af1e30476670b8737f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 4 Jun 2026 09:29:24 +0000 Subject: [PATCH 34/36] =?UTF-8?q?SharePoint=20upload=20is=20skipped=20when?= =?UTF-8?q?=20client=20is=20None=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_pashub_service.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 4ceeb832..ccb80ac4 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -616,6 +616,33 @@ def test_sharepoint_skips_upload_when_folder_not_found() -> None: mock_logger.warning.assert_called() +def test_sharepoint_skips_upload_when_sharepoint_client_is_none() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"] + ) + + service = PashubService( + pashub_client=mock_client, + sharepoint_client=None, + s3_bucket="test-bucket", + ) + + # Act — should not raise AttributeError on None._sharepoint_client + with patch("backend.pashub_fetcher.pashub_service.os.remove"): + result = service.run( + make_request( + sharepoint_link="Retrofit/Properties", + address="123 Main St | deal", + ) + ) + + # Assert + assert result == ["/tmp/core.pdf"] + + def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None From db2cbc9c574ea2a77008d44ac8b2842c0b58c096 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 4 Jun 2026 09:33:07 +0000 Subject: [PATCH 35/36] =?UTF-8?q?SharePoint=20upload=20is=20skipped=20when?= =?UTF-8?q?=20client=20is=20None=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/handler/handler.py | 14 ++++++++++---- backend/pashub_fetcher/pashub_service.py | 5 +++-- .../pashub_to_ara_trigger_request.py | 2 ++ .../pashub_fetcher/trigger_pashub_sqs_from_file.py | 5 ++++- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 626ce59d..00f0ddea 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -40,10 +40,6 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]: if (not pashub_email) or (not pashub_password): raise ValueError("Pas Hub credentials not provided") - sharepoint_client = DomnaSharepointClient( - sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3 - ) - if coordination_hub_email and coordination_hub_password: _coord_email, _coord_password = ( coordination_hub_email, @@ -57,6 +53,16 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]: payload = PashubToAraTriggerRequest.model_validate(body) logger.debug("Successfully validated request body") + sharepoint_client: Optional[DomnaSharepointClient] = None + if payload.sharepoint_site is not None: + try: + resolved_site = DomnaSites[payload.sharepoint_site] + sharepoint_client = DomnaSharepointClient(sharepoint_location=resolved_site) + except KeyError: + logger.warning( + f"Unrecognised sharepoint_site '{payload.sharepoint_site}'; skipping SharePoint upload" + ) + service = PashubService( pashub_client=get_pashub_client(pashub_email, pashub_password), sharepoint_client=sharepoint_client, diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index 2b7a7f03..86a553f0 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -38,7 +38,7 @@ class PashubService: def __init__( self, pashub_client: PashubClient, - sharepoint_client: DomnaSharepointClient, + sharepoint_client: Optional[DomnaSharepointClient], s3_bucket: str, coordination_client_factory: Optional[Callable[[], PashubClient]] = None, ) -> None: @@ -111,7 +111,7 @@ class PashubService: default_file_type=FileTypeEnum.OTHER.value, ) - if request.sharepoint_link and request.address: + if self._sharepoint_client and request.sharepoint_link and request.address: folder_name = request.address.split("|")[0].strip() folders = self._sharepoint_client.get_folders_in_path(request.sharepoint_link) match = next( @@ -207,6 +207,7 @@ class PashubService: property_folder_path: str, files: List[DownloadedFile], ) -> None: + assert self._sharepoint_client is not None for df in files: filename = os.path.basename(df.file_path) try: diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py index 5f6ce37d..d747a388 100644 --- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py +++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py @@ -14,6 +14,8 @@ class PashubToAraTriggerRequest(BaseModel): hubspot_listing_id: Optional[int] = None hubspot_deal_id: Optional[str] = None + sharepoint_site: Optional[str] = None + get_other_files: bool = False @property diff --git a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py index fe3ec7d8..d6736eda 100644 --- a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py +++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py @@ -53,9 +53,11 @@ EXCEL_PATH: str = os.path.join( ) SHAREPOINT_PROPERTIES_FOLDER: str = ( - "https://domnagroup.sharepoint.com/:f:/s/ECO/IgCCLUg3PJ-eS7BLDAwiNlelAW_WXLCb9wcjkigbLfHhPOY?e=DTbNNv" + "Housing Associations/- Client Shared Folders/Abri/Abri Property Folders (Full PAS Info)" ) +SHAREPOINT_SITE: str = "ECO" + def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: wb = load_workbook(excel_path, data_only=True) @@ -104,6 +106,7 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: address=address, deal_stage=deal_stage, sharepoint_link=SHAREPOINT_PROPERTIES_FOLDER or None, + sharepoint_site=SHAREPOINT_SITE, ) ) From ea4c58bef7d472e0cdaf8c107a0583c9baf5f729 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 4 Jun 2026 10:15:58 +0000 Subject: [PATCH 36/36] terrafform correction --- .github/workflows/deploy_terraform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index fd003c8c..920c9ab0 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -448,7 +448,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.OSMOSIS_ACD_SHAREPOINT_ID }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.PRIVATE_PAY_SHAREPOINT_ID }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }} - TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }} + TF_VAR_eco_sharepoint_id: ${{ secrets.ECO_SHAREPOINT_ID }} TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }} TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }} TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }}