From 8b6f67b3572e34752074a44010aeb5c6fdeed747 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:51:53 +0000 Subject: [PATCH] =?UTF-8?q?Wire=20service=20to=20`get=5Fevidence=5Ffiles?= =?UTF-8?q?=5Fby=5Fjob=5Fid`;=20retire=20`get=5Fcore=5Fevidence=5Ffiles=5F?= =?UTF-8?q?by=5Fjob=5Fid`=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 37 --------- backend/pashub_fetcher/pashub_service.py | 25 ++++-- .../pashub_to_ara_trigger_request.py | 2 +- .../tests/test_pashub_service.py | 78 ++++++++++++------- 4 files changed, 66 insertions(+), 76 deletions(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index da44cf48..c3f1caff 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -43,43 +43,6 @@ class PashubClient: ) logger.info("Finished initialising CotalityClient") - def get_core_evidence_files_by_job_id(self, job_id: str) -> List[str]: - logger.info(f"Getting Core Evidence Files for job ID {job_id}") - evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id) - logger.info(f"Found {len(evidence_list)} Evidence files to get") - if not evidence_list: - return [] - - saved_files: List[str] = [] - - core_files: Dict[CoreFiles, EvidenceFileData] = self._group_into_core_and_other_files( - evidence_list - ).core - - logger.info(f"Number of core files to download is {len(core_files)}") - - for _, evidence in core_files.items(): - evidence_id = evidence.file_id - if not evidence_id: - continue - - logger.info(f"Getting metadata for file {evidence.file_name}") - metadata: EvidenceMetadata = self._get_evidence_metadata( - job_id, evidence_id - ) - - download_url: str = self._build_download_url(metadata, evidence.file_id) - output_dir: str = "/tmp" - - file_name: str = evidence.file_name - file_path: str = os.path.join(output_dir, file_name) - - self._download_file(download_url, file_path) - logger.info("Successfully downloaded file") - saved_files.append(file_path) - - return saved_files - def get_evidence_files_by_job_id( self, job_id: str, include_other: bool = False ) -> DownloadedFiles: diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index 6138abe9..c9fce806 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -11,7 +11,11 @@ from backend.app.db.models.uploaded_file import ( from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf from backend.pashub_fetcher.core_files import get_file_type_string -from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError +from backend.pashub_fetcher.pashub_client import ( + DownloadedFiles, + PashubClient, + UnauthorizedError, +) from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, ) @@ -75,14 +79,16 @@ class PashubService: logger.info(f"No UPRN found for job {job_id}") try: - core_files: List[str] = active_client.get_core_evidence_files_by_job_id( - job_id + downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id( + job_id, include_other=request.get_other_files ) except UnauthorizedError: if active_client is not self._pashub_client: raise active_client = self._get_coordination_client() - core_files = active_client.get_core_evidence_files_by_job_id(job_id) + downloaded = active_client.get_evidence_files_by_job_id( + job_id, include_other=request.get_other_files + ) if uprn or hubspot_deal_id: logger.info("Uploading files to s3") @@ -92,22 +98,25 @@ class PashubService: else FileSourceEnum.COORDINATION_HUB ) upload_records = self._upload_to_s3_and_update_db( - core_files, uprn, hubspot_deal_id, file_source + downloaded.core, uprn, hubspot_deal_id, file_source ) self._save_site_notes(upload_records) # SharePoint upload disabled: pashub sharepoint_link is inconsistent # (points to property or project unpredictably) # if request.sharepoint_link: - # self._upload_to_sharepoint(request.sharepoint_link, job_files) + # self._upload_to_sharepoint(request.sharepoint_link, downloaded.core) - for file_path in core_files: + if request.get_other_files: + pass # TODO: process downloaded.other + + for file_path in downloaded.core: try: os.remove(file_path) except OSError: logger.warning(f"Failed to delete temp file {file_path}") - return core_files + return downloaded.core def _upload_to_s3_and_update_db( self, diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py index 7fb00508..5f6ce37d 100644 --- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py +++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py @@ -14,7 +14,7 @@ class PashubToAraTriggerRequest(BaseModel): hubspot_listing_id: Optional[int] = None hubspot_deal_id: Optional[str] = None - get_other_files: Optional[bool] = False + get_other_files: bool = False @property def pashub_job_id(self) -> str: diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 1f750117..7ec8dea2 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -4,7 +4,11 @@ from unittest.mock import MagicMock, call, patch from backend.app.db.models.uploaded_file import FileSourceEnum -from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError +from backend.pashub_fetcher.pashub_client import ( + DownloadedFiles, + PashubClient, + UnauthorizedError, +) from backend.pashub_fetcher.pashub_service import PashubService from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, @@ -20,12 +24,14 @@ def make_request( uprn: Optional[str] = None, hubspot_deal_id: Optional[str] = None, sharepoint_link: Optional[str] = None, + get_other_files: bool = False, ) -> PashubToAraTriggerRequest: return PashubToAraTriggerRequest( pashub_link=pashub_link, uprn=uprn, hubspot_deal_id=hubspot_deal_id, sharepoint_link=sharepoint_link, + get_other_files=get_other_files, ) @@ -43,6 +49,10 @@ def make_service( ) +def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles: + return DownloadedFiles(core=core, other=other) + + # --------------------------------------------------------------------------- # run(): returns file paths # --------------------------------------------------------------------------- @@ -51,10 +61,9 @@ def make_service( def test_run_returns_file_paths() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/a.pdf", - "/tmp/b.pdf", - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf", "/tmp/b.pdf"] + ) service = make_service(pashub_client=mock_client) @@ -72,7 +81,9 @@ def test_run_returns_file_paths() -> None: def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) service = make_service(pashub_client=mock_client) @@ -93,10 +104,9 @@ def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None: def test_run_uploads_files_to_s3_using_uprn_path() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf", - "/tmp/Photopack_002.pdf", - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf", "/tmp/Photopack_002.pdf"] + ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") @@ -132,9 +142,9 @@ def test_run_uploads_files_to_s3_using_uprn_path() -> None: def test_run_persists_uploaded_file_records_to_db() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"] + ) fake_session = MagicMock() service = make_service(pashub_client=mock_client) @@ -163,9 +173,9 @@ def test_run_persists_uploaded_file_records_to_db() -> None: def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"] + ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") @@ -191,9 +201,9 @@ def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None: def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/RdSAP_SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/RdSAP_SiteNote_001.pdf"] + ) fake_epc_data = MagicMock() fake_session = MagicMock() @@ -241,7 +251,9 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) @@ -256,16 +268,18 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: assert result == ["/tmp/a.pdf"] coord_client.get_uprn_by_job_id.assert_called_once() - coord_client.get_core_evidence_files_by_job_id.assert_called_once() + coord_client.get_evidence_files_by_job_id.assert_called_once() assert factory.call_count == 1 def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: pas_client = MagicMock(spec=PashubClient) - pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError() + pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) @@ -279,7 +293,7 @@ def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: result = service.run(make_request(uprn="12345")) assert result == ["/tmp/a.pdf"] - coord_client.get_core_evidence_files_by_job_id.assert_called_once() + coord_client.get_evidence_files_by_job_id.assert_called_once() pas_client.get_uprn_by_job_id.assert_not_called() @@ -314,7 +328,9 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() @@ -336,10 +352,12 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None: pas_client = MagicMock(spec=PashubClient) - pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError() + pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() @@ -362,9 +380,9 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing( def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/RdSAP_SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/RdSAP_SiteNote_001.pdf"] + ) service = make_service(pashub_client=mock_client)