Wire service to get_evidence_files_by_job_id; retire get_core_evidence_files_by_job_id 🟪

This commit is contained in:
Daniel Roth 2026-06-01 15:51:53 +00:00 committed by Jun-te Kim
parent 662f6de0ab
commit 49e7b7fea6
4 changed files with 66 additions and 76 deletions

View file

@ -43,43 +43,6 @@ class PashubClient:
)
logger.info("Finished initialising CotalityClient")
def get_core_evidence_files_by_job_id(self, job_id: str) -> List[str]:
logger.info(f"Getting Core Evidence Files for job ID {job_id}")
evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id)
logger.info(f"Found {len(evidence_list)} Evidence files to get")
if not evidence_list:
return []
saved_files: List[str] = []
core_files: Dict[CoreFiles, EvidenceFileData] = self._group_into_core_and_other_files(
evidence_list
).core
logger.info(f"Number of core files to download is {len(core_files)}")
for _, evidence in core_files.items():
evidence_id = evidence.file_id
if not evidence_id:
continue
logger.info(f"Getting metadata for file {evidence.file_name}")
metadata: EvidenceMetadata = self._get_evidence_metadata(
job_id, evidence_id
)
download_url: str = self._build_download_url(metadata, evidence.file_id)
output_dir: str = "/tmp"
file_name: str = evidence.file_name
file_path: str = os.path.join(output_dir, file_name)
self._download_file(download_url, file_path)
logger.info("Successfully downloaded file")
saved_files.append(file_path)
return saved_files
def get_evidence_files_by_job_id(
self, job_id: str, include_other: bool = False
) -> DownloadedFiles:

View file

@ -11,7 +11,11 @@ from backend.app.db.models.uploaded_file import (
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.pashub_fetcher.core_files import get_file_type_string
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_client import (
DownloadedFiles,
PashubClient,
UnauthorizedError,
)
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
@ -75,14 +79,16 @@ class PashubService:
logger.info(f"No UPRN found for job {job_id}")
try:
core_files: List[str] = active_client.get_core_evidence_files_by_job_id(
job_id
downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id(
job_id, include_other=request.get_other_files
)
except UnauthorizedError:
if active_client is not self._pashub_client:
raise
active_client = self._get_coordination_client()
core_files = active_client.get_core_evidence_files_by_job_id(job_id)
downloaded = active_client.get_evidence_files_by_job_id(
job_id, include_other=request.get_other_files
)
if uprn or hubspot_deal_id:
logger.info("Uploading files to s3")
@ -92,22 +98,25 @@ class PashubService:
else FileSourceEnum.COORDINATION_HUB
)
upload_records = self._upload_to_s3_and_update_db(
core_files, uprn, hubspot_deal_id, file_source
downloaded.core, uprn, hubspot_deal_id, file_source
)
self._save_site_notes(upload_records)
# SharePoint upload disabled: pashub sharepoint_link is inconsistent
# (points to property or project unpredictably)
# if request.sharepoint_link:
# self._upload_to_sharepoint(request.sharepoint_link, job_files)
# self._upload_to_sharepoint(request.sharepoint_link, downloaded.core)
for file_path in core_files:
if request.get_other_files:
pass # TODO: process downloaded.other
for file_path in downloaded.core:
try:
os.remove(file_path)
except OSError:
logger.warning(f"Failed to delete temp file {file_path}")
return core_files
return downloaded.core
def _upload_to_s3_and_update_db(
self,

View file

@ -14,7 +14,7 @@ class PashubToAraTriggerRequest(BaseModel):
hubspot_listing_id: Optional[int] = None
hubspot_deal_id: Optional[str] = None
get_other_files: Optional[bool] = False
get_other_files: bool = False
@property
def pashub_job_id(self) -> str:

View file

@ -4,7 +4,11 @@ from unittest.mock import MagicMock, call, patch
from backend.app.db.models.uploaded_file import FileSourceEnum
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_client import (
DownloadedFiles,
PashubClient,
UnauthorizedError,
)
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
@ -20,12 +24,14 @@ def make_request(
uprn: Optional[str] = None,
hubspot_deal_id: Optional[str] = None,
sharepoint_link: Optional[str] = None,
get_other_files: bool = False,
) -> PashubToAraTriggerRequest:
return PashubToAraTriggerRequest(
pashub_link=pashub_link,
uprn=uprn,
hubspot_deal_id=hubspot_deal_id,
sharepoint_link=sharepoint_link,
get_other_files=get_other_files,
)
@ -43,6 +49,10 @@ def make_service(
)
def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles:
return DownloadedFiles(core=core, other=other)
# ---------------------------------------------------------------------------
# run(): returns file paths
# ---------------------------------------------------------------------------
@ -51,10 +61,9 @@ def make_service(
def test_run_returns_file_paths() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/a.pdf",
"/tmp/b.pdf",
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf", "/tmp/b.pdf"]
)
service = make_service(pashub_client=mock_client)
@ -72,7 +81,9 @@ def test_run_returns_file_paths() -> None:
def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
service = make_service(pashub_client=mock_client)
@ -93,10 +104,9 @@ def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None:
def test_run_uploads_files_to_s3_using_uprn_path() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf",
"/tmp/Photopack_002.pdf",
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf", "/tmp/Photopack_002.pdf"]
)
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
@ -132,9 +142,9 @@ def test_run_uploads_files_to_s3_using_uprn_path() -> None:
def test_run_persists_uploaded_file_records_to_db() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf"]
)
fake_session = MagicMock()
service = make_service(pashub_client=mock_client)
@ -163,9 +173,9 @@ def test_run_persists_uploaded_file_records_to_db() -> None:
def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf"]
)
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
@ -191,9 +201,9 @@ def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None:
def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/RdSAP_SiteNote_001.pdf"]
)
fake_epc_data = MagicMock()
fake_session = MagicMock()
@ -241,7 +251,9 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None:
coord_client = MagicMock(spec=PashubClient)
coord_client.get_uprn_by_job_id.return_value = "99999"
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
@ -256,16 +268,18 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None:
assert result == ["/tmp/a.pdf"]
coord_client.get_uprn_by_job_id.assert_called_once()
coord_client.get_core_evidence_files_by_job_id.assert_called_once()
coord_client.get_evidence_files_by_job_id.assert_called_once()
assert factory.call_count == 1
def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
@ -279,7 +293,7 @@ def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None:
result = service.run(make_request(uprn="12345"))
assert result == ["/tmp/a.pdf"]
coord_client.get_core_evidence_files_by_job_id.assert_called_once()
coord_client.get_evidence_files_by_job_id.assert_called_once()
pas_client.get_uprn_by_job_id.assert_not_called()
@ -314,7 +328,9 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_uprn_by_job_id.return_value = "99999"
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
fake_session = MagicMock()
@ -336,10 +352,12 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup()
def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
fake_session = MagicMock()
@ -362,9 +380,9 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing(
def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/RdSAP_SiteNote_001.pdf"]
)
service = make_service(pashub_client=mock_client)