From e7c679e0db72c4a1609f04ae1372f90b81350947 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 1 Jun 2026 15:07:19 +0000 Subject: [PATCH] =?UTF-8?q?Group=20evidence=20into=20core=20and=20other=20?= =?UTF-8?q?via=20`=5Fgroup=5Finto=5Fcore=5Fand=5Fother=5Ffiles`=20?= =?UTF-8?q?=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pashub_fetcher/pashub_client.py | 25 +++++------ .../tests/test_pashub_client.py | 43 ++++++++----------- 2 files changed, 30 insertions(+), 38 deletions(-) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 5fc32954..26061e3e 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -1,6 +1,6 @@ from collections import defaultdict import os -from typing import Dict, List, Optional +from typing import Dict, List, NamedTuple, Optional from datetime import datetime import requests @@ -13,6 +13,11 @@ from utils.logger import setup_logger logger = setup_logger() +class _EvidenceFileGroups(NamedTuple): + core: Dict[CoreFiles, EvidenceFileData] + other: List[EvidenceFileData] + + class UnauthorizedError(Exception): pass @@ -42,9 +47,9 @@ class PashubClient: saved_files: List[str] = [] - core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files( + core_files: Dict[CoreFiles, EvidenceFileData] = self._group_into_core_and_other_files( evidence_list - ) + ).core logger.info(f"Number of core files to download is {len(core_files)}") @@ -92,23 +97,19 @@ class PashubClient: ) return None - def _select_other_files( + def _group_into_core_and_other_files( self, files: List[EvidenceFileData], - ) -> List[EvidenceFileData]: - return [f for f in files if get_core_file_type(f.file_name, f.evidence_category) is None] - - def _select_latest_core_files( - self, - files: List[EvidenceFileData], - ) -> Dict[CoreFiles, EvidenceFileData]: + ) -> _EvidenceFileGroups: grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list) + other: List[EvidenceFileData] = [] for file in files: core_type: Optional[CoreFiles] = get_core_file_type( file.file_name, file.evidence_category ) if not core_type: + other.append(file) continue grouped[core_type].append(file) @@ -121,7 +122,7 @@ class PashubClient: latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc)) latest_core_files[core_type] = latest - return latest_core_files + return _EvidenceFileGroups(core=latest_core_files, other=other) def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]: url = f"{self.base}/jobs/{job_id}/evidence" diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index a58b245e..aa6943c2 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -26,36 +26,27 @@ def make_file( # --------------------------------------------------------------------------- -# _select_other_files +# _group_into_core_and_other_files # --------------------------------------------------------------------------- -def test_select_other_files_returns_non_core_files_only() -> None: +def test_group_into_core_and_other_files_classifies_core_and_other_correctly() -> None: # Arrange client = make_client() files = [ make_file(file_name="SiteNote_001.pdf"), make_file(file_name="some_unknown_document.pdf"), - make_file(file_name="another_random_file.docx"), - make_file(file_name="Photopack_002.pdf"), ] # Act - result = client._select_other_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert [f.file_name for f in result] == [ - "some_unknown_document.pdf", - "another_random_file.docx", - ] + assert CoreFiles.SITENOTE in result.core + assert [f.file_name for f in result.other] == ["some_unknown_document.pdf"] -# --------------------------------------------------------------------------- -# _select_latest_core_files -# --------------------------------------------------------------------------- - - -def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: +def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> None: # Arrange client = make_client() files = [ @@ -67,13 +58,13 @@ def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" -def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: +def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None: # Arrange - the non-OSM file is newer but should lose to the OSM file client = make_client() files = [ @@ -90,13 +81,13 @@ def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" -def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None: +def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> None: # Arrange client = make_client() files = [ @@ -113,13 +104,13 @@ def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() - ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" + assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" -def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None: +def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> None: # Arrange client = make_client() files = [ @@ -136,7 +127,7 @@ def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"