Model/backend/pashub_fetcher/tests/test_pashub_client.py

280 lines
8 KiB
Python

# pyright: reportPrivateUsage=false
from typing import Optional
from unittest.mock import patch
from backend.pashub_fetcher.core_files import CoreFiles
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
from backend.pashub_fetcher.pashub_client import (
DownloadedFile,
DownloadedFiles,
PashubClient,
)
def make_metadata() -> EvidenceMetadata:
return EvidenceMetadata(
container_name="my-container",
blob_uri="https://storage.example.com/blob?sas=token",
)
def make_client() -> PashubClient:
return PashubClient(token="test-token")
def make_file(
file_name: str = "unknown.pdf",
evidence_category: Optional[str] = None,
created_utc: str = "2024-01-01T00:00:00",
) -> EvidenceFileData:
return EvidenceFileData(
file_id="id-1",
file_name=file_name,
created_utc=created_utc,
file_size=1024,
file_extension="pdf",
evidence_category=evidence_category,
)
# ---------------------------------------------------------------------------
# _group_into_core_and_other_files
# ---------------------------------------------------------------------------
def test_group_into_core_and_other_files_classifies_core_and_other_correctly() -> None:
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="some_unknown_document.pdf"),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert CoreFiles.SITENOTE in result.core
assert [f.file_name for f in result.other] == ["some_unknown_document.pdf"]
def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
evidence_category="retrofit design",
created_utc="2024-06-01T00:00:00",
)
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name
== "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
)
def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None:
# Arrange - the non-OSM file is newer but should lose to the OSM file
client = make_client()
files = [
make_file(
file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
evidence_category="retrofit design",
created_utc="2024-01-01T00:00:00",
),
make_file(
file_name="Retrofit Design Doc non-osm variant.pdf",
evidence_category="retrofit design",
created_utc="2024-06-01T00:00:00",
),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name
== "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
)
def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> (
None
):
# Arrange
client = make_client()
files = [
make_file(
file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
evidence_category="retrofit design",
created_utc="2024-01-01T00:00:00",
),
make_file(
file_name="2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf",
evidence_category="retrofit design",
created_utc="2024-06-01T00:00:00",
),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name
== "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
)
def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="MCS_cert_job123.pdf",
evidence_category="MCS Compliance Certificate",
),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert CoreFiles.MCS_COMPLIANCE_CERTIFICATE in result.core
assert result.other == []
def test_group_into_core_and_other_files_picks_most_recent_mcs_cert() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="mcs_cert_old.pdf",
evidence_category="MCS Compliance Certificate",
created_utc="2024-01-01T00:00:00",
),
make_file(
file_name="mcs_cert_new.pdf",
evidence_category="MCS Compliance Certificate",
created_utc="2024-06-01T00:00:00",
),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert (
result.core[CoreFiles.MCS_COMPLIANCE_CERTIFICATE].file_name
== "mcs_cert_new.pdf"
)
def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> (
None
):
# Arrange
client = make_client()
files = [
make_file(
file_name="retrofit_design_v1.pdf",
evidence_category="retrofit design",
created_utc="2024-01-01T00:00:00",
),
make_file(
file_name="retrofit_design_v2.pdf",
evidence_category="retrofit design",
created_utc="2024-06-01T00:00:00",
),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"
)
# ---------------------------------------------------------------------------
# get_evidence_files_by_job_id
# ---------------------------------------------------------------------------
def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_when_include_other_false() -> (
None
):
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="unknown_doc.pdf"),
]
# Act
with (
patch.object(client, "_get_evidence_list", return_value=files),
patch.object(client, "_get_evidence_metadata", return_value=make_metadata()),
patch.object(client, "_download_file"),
):
result = client.get_evidence_files_by_job_id("job-1", include_other=False)
# Assert
assert isinstance(result, DownloadedFiles)
assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"]
assert result.other == []
def test_get_evidence_files_by_job_id_core_files_carry_evidence_category() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="MCS_cert.pdf",
evidence_category="MCS Compliance Certificate",
),
]
# Act
with (
patch.object(client, "_get_evidence_list", return_value=files),
patch.object(client, "_get_evidence_metadata", return_value=make_metadata()),
patch.object(client, "_download_file"),
):
result = client.get_evidence_files_by_job_id("job-1", include_other=False)
# Assert
assert len(result.core) == 1
assert result.core[0].evidence_category == "MCS Compliance Certificate"
def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> (
None
):
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="unknown_doc.pdf"),
]
# Act
with (
patch.object(client, "_get_evidence_list", return_value=files),
patch.object(client, "_get_evidence_metadata", return_value=make_metadata()),
patch.object(client, "_download_file"),
):
result = client.get_evidence_files_by_job_id("job-1", include_other=True)
# Assert
assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"]
assert [df.file_path for df in result.other] == ["/tmp/unknown_doc.pdf"]