mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Downloaded files carry evidence_category as DownloadedFile 🟩
This commit is contained in:
parent
d68cbb44f8
commit
bdc35573b5
4 changed files with 83 additions and 21 deletions
|
|
@ -25,8 +25,8 @@ class _EvidenceFileGroups(NamedTuple):
|
|||
|
||||
|
||||
class DownloadedFiles(NamedTuple):
|
||||
core: List[str]
|
||||
other: List[str]
|
||||
core: List[DownloadedFile]
|
||||
other: List[DownloadedFile]
|
||||
|
||||
|
||||
class UnauthorizedError(Exception):
|
||||
|
|
@ -62,7 +62,7 @@ class PashubClient:
|
|||
evidence_list
|
||||
)
|
||||
|
||||
core_paths: List[str] = []
|
||||
core_files: List[DownloadedFile] = []
|
||||
for _, evidence in grouped.core.items():
|
||||
if not evidence.file_id:
|
||||
continue
|
||||
|
|
@ -73,9 +73,15 @@ class PashubClient:
|
|||
file_path: str = os.path.join("/tmp", evidence.file_name)
|
||||
self._download_file(download_url, file_path)
|
||||
logger.info("Successfully downloaded file")
|
||||
core_paths.append(file_path)
|
||||
core_files.append(
|
||||
DownloadedFile(
|
||||
file_path=file_path,
|
||||
evidence_category=evidence.evidence_category,
|
||||
created_utc=datetime.fromisoformat(evidence.created_utc),
|
||||
)
|
||||
)
|
||||
|
||||
other_paths: List[str] = []
|
||||
other_files: List[DownloadedFile] = []
|
||||
|
||||
if include_other:
|
||||
for evidence in grouped.other:
|
||||
|
|
@ -88,9 +94,15 @@ class PashubClient:
|
|||
|
||||
self._download_file(download_url, file_path)
|
||||
logger.info("Successfully downloaded other file")
|
||||
other_paths.append(file_path)
|
||||
other_files.append(
|
||||
DownloadedFile(
|
||||
file_path=file_path,
|
||||
evidence_category=evidence.evidence_category,
|
||||
created_utc=datetime.fromisoformat(evidence.created_utc),
|
||||
)
|
||||
)
|
||||
|
||||
return DownloadedFiles(core=core_paths, other=other_paths)
|
||||
return DownloadedFiles(core=core_files, other=other_files)
|
||||
|
||||
def get_uprn_by_job_id(self, job_id: str) -> Optional[str]:
|
||||
logger.info(f"Getting UPRN for job ID {job_id}")
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ from backend.documents_parser.db_writer import save_epc_property_data
|
|||
from backend.documents_parser.parser import parse_site_notes_pdf
|
||||
from backend.pashub_fetcher.core_files import get_file_type_string
|
||||
from backend.pashub_fetcher.pashub_client import (
|
||||
DownloadedFile,
|
||||
DownloadedFiles,
|
||||
PashubClient,
|
||||
UnauthorizedError,
|
||||
|
|
@ -116,17 +117,17 @@ class PashubService:
|
|||
# if request.sharepoint_link:
|
||||
# self._upload_to_sharepoint(request.sharepoint_link, downloaded.core)
|
||||
|
||||
for file_path in downloaded.core + downloaded.other:
|
||||
for df in downloaded.core + downloaded.other:
|
||||
try:
|
||||
os.remove(file_path)
|
||||
os.remove(df.file_path)
|
||||
except OSError:
|
||||
logger.warning(f"Failed to delete temp file {file_path}")
|
||||
logger.warning(f"Failed to delete temp file {df.file_path}")
|
||||
|
||||
return downloaded.core + downloaded.other
|
||||
return [df.file_path for df in downloaded.core + downloaded.other]
|
||||
|
||||
def _upload_to_s3_and_update_db(
|
||||
self,
|
||||
job_files: List[str],
|
||||
job_files: List[DownloadedFile],
|
||||
uprn: Optional[str],
|
||||
hubspot_deal_id: Optional[str],
|
||||
file_source: FileSourceEnum,
|
||||
|
|
@ -144,11 +145,11 @@ class PashubService:
|
|||
file_paths: List[str] = []
|
||||
uploaded_files: List[UploadedFile] = []
|
||||
|
||||
for file_path in job_files:
|
||||
filename = os.path.basename(file_path)
|
||||
for df in job_files:
|
||||
filename = os.path.basename(df.file_path)
|
||||
file_key = f"{base_path}/{filename}"
|
||||
|
||||
upload_file_to_s3(file_path, self._s3_bucket, file_key)
|
||||
upload_file_to_s3(df.file_path, self._s3_bucket, file_key)
|
||||
|
||||
uploaded_file = UploadedFile(
|
||||
s3_file_bucket=self._s3_bucket,
|
||||
|
|
@ -157,9 +158,9 @@ class PashubService:
|
|||
uprn=int(uprn) if uprn else None,
|
||||
hubspot_deal_id=hubspot_deal_id,
|
||||
file_source=file_source.value,
|
||||
file_type=get_file_type_string(filename) or default_file_type,
|
||||
file_type=get_file_type_string(filename, df.evidence_category) or default_file_type,
|
||||
)
|
||||
file_paths.append(file_path)
|
||||
file_paths.append(df.file_path)
|
||||
uploaded_files.append(uploaded_file)
|
||||
|
||||
with db_session() as session:
|
||||
|
|
|
|||
|
|
@ -119,6 +119,47 @@ def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_
|
|||
assert result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
|
||||
|
||||
|
||||
def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None:
|
||||
# Arrange
|
||||
client = make_client()
|
||||
files = [
|
||||
make_file(
|
||||
file_name="MCS_cert_job123.pdf",
|
||||
evidence_category="MCS Compliance Certificate",
|
||||
),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client._group_into_core_and_other_files(files)
|
||||
|
||||
# Assert
|
||||
assert CoreFiles.MCS_CERTIFICATE in result.core
|
||||
assert result.other == []
|
||||
|
||||
|
||||
def test_group_into_core_and_other_files_picks_most_recent_mcs_cert() -> None:
|
||||
# Arrange
|
||||
client = make_client()
|
||||
files = [
|
||||
make_file(
|
||||
file_name="mcs_cert_old.pdf",
|
||||
evidence_category="MCS Compliance Certificate",
|
||||
created_utc="2024-01-01T00:00:00",
|
||||
),
|
||||
make_file(
|
||||
file_name="mcs_cert_new.pdf",
|
||||
evidence_category="MCS Compliance Certificate",
|
||||
created_utc="2024-06-01T00:00:00",
|
||||
),
|
||||
]
|
||||
|
||||
# Act
|
||||
result = client._group_into_core_and_other_files(files)
|
||||
|
||||
# Assert
|
||||
assert result.core[CoreFiles.MCS_CERTIFICATE].file_name == "mcs_cert_new.pdf"
|
||||
|
||||
|
||||
def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> None:
|
||||
# Arrange
|
||||
client = make_client()
|
||||
|
|
@ -165,7 +206,7 @@ def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_
|
|||
|
||||
# Assert
|
||||
assert isinstance(result, DownloadedFiles)
|
||||
assert result.core == ["/tmp/SiteNote_001.pdf"]
|
||||
assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"]
|
||||
assert result.other == []
|
||||
|
||||
|
||||
|
|
@ -209,5 +250,5 @@ def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_t
|
|||
result = client.get_evidence_files_by_job_id("job-1", include_other=True)
|
||||
|
||||
# Assert
|
||||
assert result.core == ["/tmp/SiteNote_001.pdf"]
|
||||
assert result.other == ["/tmp/unknown_doc.pdf"]
|
||||
assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"]
|
||||
assert [df.file_path for df in result.other] == ["/tmp/unknown_doc.pdf"]
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
import pytest
|
||||
from datetime import datetime
|
||||
from typing import Any, Callable, Optional
|
||||
from unittest.mock import MagicMock, call, patch
|
||||
|
||||
|
||||
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
|
||||
from backend.pashub_fetcher.pashub_client import (
|
||||
DownloadedFile,
|
||||
DownloadedFiles,
|
||||
PashubClient,
|
||||
UnauthorizedError,
|
||||
|
|
@ -49,8 +51,14 @@ def make_service(
|
|||
)
|
||||
|
||||
|
||||
_DEFAULT_UTC = datetime(2024, 1, 1)
|
||||
|
||||
|
||||
def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles:
|
||||
return DownloadedFiles(core=core, other=other)
|
||||
return DownloadedFiles(
|
||||
core=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in core],
|
||||
other=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in other],
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue