# pyright: reportPrivateUsage=false from typing import Optional from unittest.mock import patch from backend.pashub_fetcher.core_files import CoreFiles from backend.pashub_fetcher.evidence_file_data import EvidenceFileData from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata from backend.pashub_fetcher.pashub_client import ( DownloadedFile, DownloadedFiles, PashubClient, ) def make_metadata() -> EvidenceMetadata: return EvidenceMetadata( container_name="my-container", blob_uri="https://storage.example.com/blob?sas=token", ) def make_client() -> PashubClient: return PashubClient(token="test-token") def make_file( file_name: str = "unknown.pdf", evidence_category: Optional[str] = None, created_utc: str = "2024-01-01T00:00:00", ) -> EvidenceFileData: return EvidenceFileData( file_id="id-1", file_name=file_name, created_utc=created_utc, file_size=1024, file_extension="pdf", evidence_category=evidence_category, ) # --------------------------------------------------------------------------- # _group_into_core_and_other_files # --------------------------------------------------------------------------- def test_group_into_core_and_other_files_classifies_core_and_other_correctly() -> None: # Arrange client = make_client() files = [ make_file(file_name="SiteNote_001.pdf"), make_file(file_name="some_unknown_document.pdf"), ] # Act result = client._group_into_core_and_other_files(files) # Assert assert CoreFiles.SITENOTE in result.core assert [f.file_name for f in result.other] == ["some_unknown_document.pdf"] def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> None: # Arrange client = make_client() files = [ make_file( file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", evidence_category="retrofit design", created_utc="2024-06-01T00:00:00", ) ] # Act result = client._group_into_core_and_other_files(files) # Assert assert ( result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" ) def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None: # Arrange - the non-OSM file is newer but should lose to the OSM file client = make_client() files = [ make_file( file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", evidence_category="retrofit design", created_utc="2024-01-01T00:00:00", ), make_file( file_name="Retrofit Design Doc non-osm variant.pdf", evidence_category="retrofit design", created_utc="2024-06-01T00:00:00", ), ] # Act result = client._group_into_core_and_other_files(files) # Assert assert ( result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" ) def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> ( None ): # Arrange client = make_client() files = [ make_file( file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf", evidence_category="retrofit design", created_utc="2024-01-01T00:00:00", ), make_file( file_name="2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf", evidence_category="retrofit design", created_utc="2024-06-01T00:00:00", ), ] # Act result = client._group_into_core_and_other_files(files) # Assert assert ( result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" ) def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None: # Arrange client = make_client() files = [ make_file( file_name="MCS_cert_job123.pdf", evidence_category="MCS Compliance Certificate", ), ] # Act result = client._group_into_core_and_other_files(files) # Assert assert CoreFiles.MCS_COMPLIANCE_CERTIFICATE in result.core assert result.other == [] def test_group_into_core_and_other_files_picks_most_recent_mcs_cert() -> None: # Arrange client = make_client() files = [ make_file( file_name="mcs_cert_old.pdf", evidence_category="MCS Compliance Certificate", created_utc="2024-01-01T00:00:00", ), make_file( file_name="mcs_cert_new.pdf", evidence_category="MCS Compliance Certificate", created_utc="2024-06-01T00:00:00", ), ] # Act result = client._group_into_core_and_other_files(files) # Assert assert ( result.core[CoreFiles.MCS_COMPLIANCE_CERTIFICATE].file_name == "mcs_cert_new.pdf" ) def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> ( None ): # Arrange client = make_client() files = [ make_file( file_name="retrofit_design_v1.pdf", evidence_category="retrofit design", created_utc="2024-01-01T00:00:00", ), make_file( file_name="retrofit_design_v2.pdf", evidence_category="retrofit design", created_utc="2024-06-01T00:00:00", ), ] # Act result = client._group_into_core_and_other_files(files) # Assert assert ( result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" ) # --------------------------------------------------------------------------- # get_evidence_files_by_job_id # --------------------------------------------------------------------------- def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_when_include_other_false() -> ( None ): # Arrange client = make_client() files = [ make_file(file_name="SiteNote_001.pdf"), make_file(file_name="unknown_doc.pdf"), ] # Act with ( patch.object(client, "_get_evidence_list", return_value=files), patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), patch.object(client, "_download_file"), ): result = client.get_evidence_files_by_job_id("job-1", include_other=False) # Assert assert isinstance(result, DownloadedFiles) assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"] assert result.other == [] def test_get_evidence_files_by_job_id_core_files_carry_evidence_category() -> None: # Arrange client = make_client() files = [ make_file( file_name="MCS_cert.pdf", evidence_category="MCS Compliance Certificate", ), ] # Act with ( patch.object(client, "_get_evidence_list", return_value=files), patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), patch.object(client, "_download_file"), ): result = client.get_evidence_files_by_job_id("job-1", include_other=False) # Assert assert len(result.core) == 1 assert result.core[0].evidence_category == "MCS Compliance Certificate" def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> ( None ): # Arrange client = make_client() files = [ make_file(file_name="SiteNote_001.pdf"), make_file(file_name="unknown_doc.pdf"), ] # Act with ( patch.object(client, "_get_evidence_list", return_value=files), patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), patch.object(client, "_download_file"), ): result = client.get_evidence_files_by_job_id("job-1", include_other=True) # Assert assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"] assert [df.file_path for df in result.other] == ["/tmp/unknown_doc.pdf"]