Group evidence into core and other via _group_into_core_and_other_files 🟪

This commit is contained in:
Daniel Roth 2026-06-01 15:07:19 +00:00 committed by Jun-te Kim
parent 99229844b5
commit e7c679e0db
2 changed files with 30 additions and 38 deletions

View file

@ -1,6 +1,6 @@
from collections import defaultdict
import os
from typing import Dict, List, Optional
from typing import Dict, List, NamedTuple, Optional
from datetime import datetime
import requests
@ -13,6 +13,11 @@ from utils.logger import setup_logger
logger = setup_logger()
class _EvidenceFileGroups(NamedTuple):
core: Dict[CoreFiles, EvidenceFileData]
other: List[EvidenceFileData]
class UnauthorizedError(Exception):
pass
@ -42,9 +47,9 @@ class PashubClient:
saved_files: List[str] = []
core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files(
core_files: Dict[CoreFiles, EvidenceFileData] = self._group_into_core_and_other_files(
evidence_list
)
).core
logger.info(f"Number of core files to download is {len(core_files)}")
@ -92,23 +97,19 @@ class PashubClient:
)
return None
def _select_other_files(
def _group_into_core_and_other_files(
self,
files: List[EvidenceFileData],
) -> List[EvidenceFileData]:
return [f for f in files if get_core_file_type(f.file_name, f.evidence_category) is None]
def _select_latest_core_files(
self,
files: List[EvidenceFileData],
) -> Dict[CoreFiles, EvidenceFileData]:
) -> _EvidenceFileGroups:
grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)
other: List[EvidenceFileData] = []
for file in files:
core_type: Optional[CoreFiles] = get_core_file_type(
file.file_name, file.evidence_category
)
if not core_type:
other.append(file)
continue
grouped[core_type].append(file)
@ -121,7 +122,7 @@ class PashubClient:
latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc))
latest_core_files[core_type] = latest
return latest_core_files
return _EvidenceFileGroups(core=latest_core_files, other=other)
def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]:
url = f"{self.base}/jobs/{job_id}/evidence"

View file

@ -26,36 +26,27 @@ def make_file(
# ---------------------------------------------------------------------------
# _select_other_files
# _group_into_core_and_other_files
# ---------------------------------------------------------------------------
def test_select_other_files_returns_non_core_files_only() -> None:
def test_group_into_core_and_other_files_classifies_core_and_other_correctly() -> None:
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="some_unknown_document.pdf"),
make_file(file_name="another_random_file.docx"),
make_file(file_name="Photopack_002.pdf"),
]
# Act
result = client._select_other_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert [f.file_name for f in result] == [
"some_unknown_document.pdf",
"another_random_file.docx",
]
assert CoreFiles.SITENOTE in result.core
assert [f.file_name for f in result.other] == ["some_unknown_document.pdf"]
# ---------------------------------------------------------------------------
# _select_latest_core_files
# ---------------------------------------------------------------------------
def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> None:
# Arrange
client = make_client()
files = [
@ -67,13 +58,13 @@ def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None:
# Arrange - the non-OSM file is newer but should lose to the OSM file
client = make_client()
files = [
@ -90,13 +81,13 @@ def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None:
def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> None:
# Arrange
client = make_client()
files = [
@ -113,13 +104,13 @@ def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None:
def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> None:
# Arrange
client = make_client()
files = [
@ -136,7 +127,7 @@ def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates()
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"
assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"