import pytest from datetime import datetime from typing import Any, Callable, Optional from unittest.mock import MagicMock, call, patch from infrastructure.postgres.uploaded_file_table import FileSourceEnum, FileTypeEnum from backend.pashub_fetcher.pashub_client import ( DownloadedFile, DownloadedFiles, PashubClient, UnauthorizedError, ) from backend.pashub_fetcher.pashub_service import PashubService from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, ) from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient FAKE_JOB_LINK = "https://pashub.net/jobs/job-id-123/details" def make_request( pashub_link: str = FAKE_JOB_LINK, uprn: Optional[str] = None, hubspot_deal_id: Optional[str] = None, sharepoint_link: Optional[str] = None, get_other_files: bool = False, address: Optional[str] = None, ) -> PashubToAraTriggerRequest: return PashubToAraTriggerRequest( pashub_link=pashub_link, uprn=uprn, hubspot_deal_id=hubspot_deal_id, sharepoint_link=sharepoint_link, get_other_files=get_other_files, address=address, ) def make_service( pashub_client: Optional[PashubClient] = None, sharepoint_client: Optional[DomnaSharepointClient] = None, s3_bucket: str = "test-bucket", coordination_client_factory: Optional[Callable[[], PashubClient]] = None, ) -> PashubService: return PashubService( pashub_client=pashub_client or MagicMock(spec=PashubClient), sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient), s3_bucket=s3_bucket, coordination_client_factory=coordination_client_factory, ) _DEFAULT_UTC = datetime(2024, 1, 1) def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles: return DownloadedFiles( core=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in core], other=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in other], ) # --------------------------------------------------------------------------- # run(): returns file paths # --------------------------------------------------------------------------- def test_run_returns_file_paths() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/a.pdf", "/tmp/b.pdf"] ) service = make_service(pashub_client=mock_client) with patch("backend.pashub_fetcher.pashub_service.os.remove"): result = service.run(make_request()) assert result == ["/tmp/a.pdf", "/tmp/b.pdf"] # --------------------------------------------------------------------------- # run(): returns core + other file paths when get_other_files=True # --------------------------------------------------------------------------- def test_run_returns_core_and_other_file_paths() -> None: # Arrange mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/core.pdf"], other=["/tmp/other.pdf"], ) service = make_service(pashub_client=mock_client) # Act with patch("backend.pashub_fetcher.pashub_service.os.remove"): result = service.run(make_request(get_other_files=True)) # Assert assert result == ["/tmp/core.pdf", "/tmp/other.pdf"] # --------------------------------------------------------------------------- # run(): skips upload when neither uprn nor hubspot_deal_id # --------------------------------------------------------------------------- def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/a.pdf"] ) service = make_service(pashub_client=mock_client) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3, patch("backend.pashub_fetcher.pashub_service.os.remove"), ): service.run(make_request(uprn=None, hubspot_deal_id=None)) mock_s3.assert_not_called() # --------------------------------------------------------------------------- # run(): UPRN present → uploads each file to S3 with correct bucket/key # --------------------------------------------------------------------------- def test_run_uploads_files_to_s3_using_uprn_path() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/SiteNote_001.pdf", "/tmp/Photopack_002.pdf"] ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3, patch("backend.pashub_fetcher.pashub_service.db_session"), patch("backend.pashub_fetcher.pashub_service.os.remove"), ): service.run(make_request(uprn="12345")) mock_s3.assert_has_calls( [ call( "/tmp/SiteNote_001.pdf", "my-bucket", "documents/uprn/12345/SiteNote_001.pdf", ), call( "/tmp/Photopack_002.pdf", "my-bucket", "documents/uprn/12345/Photopack_002.pdf", ), ], any_order=False, ) # --------------------------------------------------------------------------- # run(): UPRN present → UploadedFile records added to DB session # --------------------------------------------------------------------------- def test_run_persists_uploaded_file_records_to_db() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/SiteNote_001.pdf"] ) fake_session = MagicMock() service = make_service(pashub_client=mock_client) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, patch("backend.pashub_fetcher.pashub_service.os.remove"), ): mock_db.return_value.__enter__.return_value = fake_session service.run(make_request(uprn="12345")) fake_session.add_all.assert_called_once() added: list[Any] = fake_session.add_all.call_args[0][0] assert len(added) == 1 assert added[0].s3_file_bucket == "test-bucket" assert added[0].uprn == 12345 assert added[0].file_source == FileSourceEnum.PAS_HUB.value # --------------------------------------------------------------------------- # run(): hubspot_deal_id only → uses deal_id S3 path prefix # --------------------------------------------------------------------------- def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/SiteNote_001.pdf"] ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3, patch("backend.pashub_fetcher.pashub_service.db_session"), patch("backend.pashub_fetcher.pashub_service.os.remove"), ): service.run(make_request(uprn=None, hubspot_deal_id="deal-abc")) mock_s3.assert_called_once_with( "/tmp/SiteNote_001.pdf", "my-bucket", "documents/hubspot_deal_id/deal-abc/SiteNote_001.pdf", ) # --------------------------------------------------------------------------- # run(): RD_SAP_SITE_NOTE file → site notes parsed and saved to DB # --------------------------------------------------------------------------- def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/RdSAP_SiteNote_001.pdf"] ) fake_epc_data = MagicMock() fake_session = MagicMock() fake_uploaded_file_id = 99 service = make_service(pashub_client=mock_client) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), patch( "backend.pashub_fetcher.pashub_service.parse_site_notes_pdf", return_value=fake_epc_data, ) as mock_parse, patch( "backend.pashub_fetcher.pashub_service.save_epc_property_data" ) as mock_save, patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, patch("backend.pashub_fetcher.pashub_service.os.remove"), ): fake_session.add_all = MagicMock( side_effect=lambda files: setattr(files[0], "id", fake_uploaded_file_id) ) mock_db.return_value.__enter__.return_value = fake_session service.run(make_request(uprn="12345")) mock_parse.assert_called_once_with("/tmp/RdSAP_SiteNote_001.pdf") mock_save.assert_called_once_with( fake_session, fake_epc_data, uploaded_file_id=fake_uploaded_file_id ) # --------------------------------------------------------------------------- # run(): site notes parse failure → warning logged, run returns normally # --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # run(): coordination fallback # --------------------------------------------------------------------------- def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: pas_client = MagicMock(spec=PashubClient) pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/a.pdf"] ) factory = MagicMock(return_value=coord_client) service = make_service( pashub_client=pas_client, coordination_client_factory=factory ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), patch("backend.pashub_fetcher.pashub_service.db_session"), patch("backend.pashub_fetcher.pashub_service.os.remove"), ): result = service.run(make_request()) assert result == ["/tmp/a.pdf"] coord_client.get_uprn_by_job_id.assert_called_once() coord_client.get_evidence_files_by_job_id.assert_called_once() assert factory.call_count == 1 def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: pas_client = MagicMock(spec=PashubClient) pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/a.pdf"] ) factory = MagicMock(return_value=coord_client) service = make_service( pashub_client=pas_client, coordination_client_factory=factory ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), patch("backend.pashub_fetcher.pashub_service.db_session"), patch("backend.pashub_fetcher.pashub_service.os.remove"), ): result = service.run(make_request(uprn="12345")) assert result == ["/tmp/a.pdf"] coord_client.get_evidence_files_by_job_id.assert_called_once() pas_client.get_uprn_by_job_id.assert_not_called() def test_run_raises_unauthorized_when_pas_401_and_no_factory() -> None: pas_client = MagicMock(spec=PashubClient) pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() service = make_service(pashub_client=pas_client) with pytest.raises(UnauthorizedError): service.run(make_request()) def test_run_raises_unauthorized_when_both_clients_401() -> None: pas_client = MagicMock(spec=PashubClient) pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.side_effect = UnauthorizedError() factory = MagicMock(return_value=coord_client) service = make_service( pashub_client=pas_client, coordination_client_factory=factory ) with pytest.raises(UnauthorizedError): service.run(make_request()) def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> ( None ): pas_client = MagicMock(spec=PashubClient) pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/a.pdf"] ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() service = make_service( pashub_client=pas_client, coordination_client_factory=factory ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, patch("backend.pashub_fetcher.pashub_service.os.remove"), ): mock_db.return_value.__enter__.return_value = fake_session service.run(make_request()) fake_session.add_all.assert_called_once() added: list[Any] = fake_session.add_all.call_args[0][0] assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> ( None ): pas_client = MagicMock(spec=PashubClient) pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/a.pdf"] ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() service = make_service( pashub_client=pas_client, coordination_client_factory=factory ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, patch("backend.pashub_fetcher.pashub_service.os.remove"), ): mock_db.return_value.__enter__.return_value = fake_session service.run(make_request(uprn="12345")) fake_session.add_all.assert_called_once() added: list[Any] = fake_session.add_all.call_args[0][0] assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value # --------------------------------------------------------------------------- # run(): get_other_files=True → other temp files deleted after run # --------------------------------------------------------------------------- def test_run_deletes_other_temp_files_when_get_other_files_true() -> None: # Arrange mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/core.pdf"], other=["/tmp/other.pdf"], ) service = make_service(pashub_client=mock_client) # Act with patch("backend.pashub_fetcher.pashub_service.os.remove") as mock_remove: service.run(make_request(get_other_files=True)) # Assert mock_remove.assert_any_call("/tmp/core.pdf") mock_remove.assert_any_call("/tmp/other.pdf") # --------------------------------------------------------------------------- # run(): get_other_files=True → other files uploaded to S3 # --------------------------------------------------------------------------- def test_run_uploads_other_files_to_s3_when_get_other_files_true() -> None: # Arrange mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/SiteNote_001.pdf"], other=["/tmp/unknown_file.pdf"], ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") # Act with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3, patch("backend.pashub_fetcher.pashub_service.db_session"), patch("backend.pashub_fetcher.pashub_service.os.remove"), ): service.run(make_request(uprn="12345", get_other_files=True)) # Assert mock_s3.assert_any_call( "/tmp/unknown_file.pdf", "my-bucket", "documents/uprn/12345/unknown_file.pdf", ) # --------------------------------------------------------------------------- # run(): get_other_files=True → other files persisted with file_type OTHER # --------------------------------------------------------------------------- def test_run_persists_other_files_with_other_file_type() -> None: # Arrange mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=[], other=["/tmp/unknown_file.pdf"], ) fake_session = MagicMock() service = make_service(pashub_client=mock_client) # Act with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, patch("backend.pashub_fetcher.pashub_service.os.remove"), ): mock_db.return_value.__enter__.return_value = fake_session service.run(make_request(uprn="12345", get_other_files=True)) # Assert all_added = [item for c in fake_session.add_all.call_args_list for item in c[0][0]] assert len(all_added) == 1 assert all_added[0].file_type == FileTypeEnum.OTHER.value def test_run_persists_mcs_cert_with_mcs_compliance_certificate_file_type() -> None: # Arrange mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = DownloadedFiles( core=[ DownloadedFile( "/tmp/MCS_cert.pdf", "MCS Compliance Certificate", datetime(2024, 1, 1) ) ], other=[], ) fake_session = MagicMock() service = make_service(pashub_client=mock_client) # Act with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, patch("backend.pashub_fetcher.pashub_service.os.remove"), ): mock_db.return_value.__enter__.return_value = fake_session service.run(make_request(uprn="12345")) # Assert fake_session.add_all.assert_called_once() added: list[Any] = fake_session.add_all.call_args[0][0] assert added[0].file_type == FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value # --------------------------------------------------------------------------- # run(): SharePoint upload # --------------------------------------------------------------------------- def test_sharepoint_uploads_all_files_to_property_folder() -> None: # Arrange mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/core.pdf"], other=["/tmp/other.pdf"], ) mock_sharepoint = MagicMock(spec=DomnaSharepointClient) mock_sharepoint.get_folders_in_path.return_value = { "value": [{"name": "123 Main St"}] } service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint) # Act with patch("backend.pashub_fetcher.pashub_service.os.remove"): service.run( make_request( sharepoint_link="Retrofit/Properties", get_other_files=True, address="123 Main St | some deal", ) ) # Assert mock_sharepoint.upload_file.assert_any_call( "/tmp/core.pdf", "Retrofit/Properties/123 Main St", "core.pdf" ) mock_sharepoint.upload_file.assert_any_call( "/tmp/other.pdf", "Retrofit/Properties/123 Main St", "other.pdf" ) def test_sharepoint_skips_upload_when_folder_not_found() -> None: # Arrange mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/core.pdf"] ) mock_sharepoint = MagicMock(spec=DomnaSharepointClient) mock_sharepoint.get_folders_in_path.return_value = { "value": [{"name": "Different Property"}] } service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint) # Act with ( patch("backend.pashub_fetcher.pashub_service.os.remove"), patch("backend.pashub_fetcher.pashub_service.logger") as mock_logger, ): service.run( make_request( sharepoint_link="Retrofit/Properties", address="No Such Property | deal", ) ) # Assert mock_sharepoint.upload_file.assert_not_called() mock_logger.warning.assert_called() def test_sharepoint_skips_upload_when_sharepoint_client_is_none() -> None: # Arrange mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/core.pdf"] ) service = PashubService( pashub_client=mock_client, sharepoint_client=None, s3_bucket="test-bucket", ) # Act — should not raise AttributeError on None._sharepoint_client with patch("backend.pashub_fetcher.pashub_service.os.remove"): result = service.run( make_request( sharepoint_link="Retrofit/Properties", address="123 Main St | deal", ) ) # Assert assert result == ["/tmp/core.pdf"] def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( core=["/tmp/RdSAP_SiteNote_001.pdf"] ) service = make_service(pashub_client=mock_client) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), patch( "backend.pashub_fetcher.pashub_service.parse_site_notes_pdf", side_effect=ValueError("corrupt pdf"), ), patch( "backend.pashub_fetcher.pashub_service.save_epc_property_data" ) as mock_save, patch("backend.pashub_fetcher.pashub_service.db_session"), patch("backend.pashub_fetcher.pashub_service.logger") as mock_logger, patch("backend.pashub_fetcher.pashub_service.os.remove"), ): result = service.run(make_request(uprn="12345")) assert result == ["/tmp/RdSAP_SiteNote_001.pdf"] mock_logger.warning.assert_called() mock_save.assert_not_called()