mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
new methods for downloading all core files for pashub URL. Download currently not being authorised
This commit is contained in:
parent
6617d9e614
commit
609468cff9
4 changed files with 53 additions and 34 deletions
|
|
@ -1,13 +1,23 @@
|
|||
from typing import List, Optional
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import requests
|
||||
|
||||
from backend.pashub_fetcher.core_files import CoreFiles
|
||||
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
|
||||
from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
|
||||
from utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class CotalityClient:
|
||||
def __init__(self, token: str):
|
||||
|
||||
logger.info(f"Container UTC time: {datetime.now(timezone.utc)}")
|
||||
|
||||
self.token = token
|
||||
self.company_id = "cb5249e2-8f31-4ef4-aefd-08ddaccb1fa2"
|
||||
self.base = "https://pashub.net/api"
|
||||
|
|
@ -19,61 +29,68 @@ class CotalityClient:
|
|||
"Accept": "application/json",
|
||||
}
|
||||
)
|
||||
logger.info("Finished initialising CotalityClient")
|
||||
|
||||
def get_core_envidence_files_by_job_id(self, job_id: str) -> List[str]:
|
||||
# url = f"{self.base}/jobs/{job_id}/evidence"
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def get_evidence_files_by_uprn(self, uprn: str) -> List[str]:
|
||||
"""
|
||||
Download evidence files for the most recent job for a UPRN.
|
||||
Returns a list of saved filenames.
|
||||
"""
|
||||
|
||||
job_id: Optional[str] = self._get_latest_job_id(uprn)
|
||||
if not job_id:
|
||||
return []
|
||||
|
||||
logger.info(f"Getting Core Evidence Files for job ID {job_id}")
|
||||
evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id)
|
||||
logger.info(f"Found {len(evidence_list)} Evidence files to get")
|
||||
if not evidence_list:
|
||||
return []
|
||||
|
||||
saved_files: List[str] = []
|
||||
|
||||
for evidence in evidence_list:
|
||||
core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files(
|
||||
evidence_list
|
||||
)
|
||||
|
||||
logger.info(f"Number of core files to download is {len(core_files)}")
|
||||
|
||||
for _, evidence in core_files.items():
|
||||
evidence_id = evidence.file_id
|
||||
if not evidence_id:
|
||||
continue
|
||||
|
||||
logger.info(f"Getting metadata for file {evidence.file_name}")
|
||||
metadata: EvidenceMetadata = self._get_evidence_metadata(
|
||||
job_id, evidence_id
|
||||
)
|
||||
|
||||
download_url: str = self._build_download_url(metadata, evidence.file_id)
|
||||
logger.info(f"Download URL: {download_url}")
|
||||
file_name = evidence.file_name
|
||||
|
||||
self._download_file(download_url, file_name)
|
||||
logger.info("Successfully downloaded file")
|
||||
saved_files.append(file_name)
|
||||
|
||||
return saved_files
|
||||
|
||||
def _get_latest_job_id(self, uprn: str) -> Optional[str]:
|
||||
url = f"{self.base}/jobs"
|
||||
params = {
|
||||
"pageIndex": 0,
|
||||
"pageSize": 20,
|
||||
"orderBy": "createdUtc",
|
||||
"orderDesc": "true",
|
||||
"addressUprn": uprn,
|
||||
"companyId": self.company_id,
|
||||
}
|
||||
def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]:
|
||||
for core_file in CoreFiles:
|
||||
if file.file_name.startswith(core_file.value):
|
||||
return core_file
|
||||
return None
|
||||
|
||||
r = self.session.get(url, params=params)
|
||||
r.raise_for_status()
|
||||
def _select_latest_core_files(
|
||||
self,
|
||||
files: List[EvidenceFileData],
|
||||
) -> Dict[CoreFiles, EvidenceFileData]:
|
||||
grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)
|
||||
|
||||
jobs = r.json().get("results", [])
|
||||
return jobs[0]["id"] if jobs else None
|
||||
for file in files:
|
||||
core_type = self._get_core_file_type(file)
|
||||
if not core_type:
|
||||
continue
|
||||
grouped[core_type].append(file)
|
||||
|
||||
latest_files: Dict[CoreFiles, EvidenceFileData] = {}
|
||||
|
||||
for core_type, group in grouped.items():
|
||||
latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc))
|
||||
latest_files[core_type] = latest
|
||||
|
||||
return latest_files
|
||||
|
||||
def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]:
|
||||
url = f"{self.base}/jobs/{job_id}/evidence"
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest
|
|||
RUN chmod +x /usr/local/bin/aws-lambda-rie
|
||||
|
||||
# Install Lambda runtime client
|
||||
RUN pip install awslambdaric playwright==1.58.0
|
||||
RUN pip install awslambdaric playwright==1.58.0 requests
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
|
|
|||
|
|
@ -20,7 +20,9 @@ def handler(event: Mapping[str, Any], context: Any) -> None:
|
|||
raise
|
||||
|
||||
client = CotalityClient(token=token)
|
||||
uprn = "100061885568" # TODO: get from request body
|
||||
# uprn = "100061885568" # TODO: get from request body
|
||||
job_id = "5abf6e27-e4c4-4ba8-b69d-9e34939e0002" # TODO: get from request body
|
||||
|
||||
saved_files: List[str] = client.get_evidence_files_by_uprn(uprn=uprn)
|
||||
saved_files: List[str] = client.get_core_envidence_files_by_job_id(job_id)
|
||||
# saved_files: List[str] = client.get_evidence_files_by_uprn(uprn=uprn)
|
||||
print(f"saved {len(saved_files)} files")
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import requests
|
||||
import json
|
||||
|
||||
TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzQyNzg3NjIsImV4cCI6MTc3NDI4NTk2Miwic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.ESIbau52J7KXL22tM8GlO9eV0f0pCOFdoQGL2YcjsTEcSeucHBuI9lHXT2dNJn0E8qlgafjazaMkoMs2g0TiTUUZU6XsKqKpUAJy4kk-qKp53V5az7e2MG9uDSa5bB1vWsQQw37zaNVQ0FQkpYHSiFeGoBh1PjuKwCpLjbl94bx7S4bQKaJSZRUj5TS75k6HnSOhUtN9LYLMPRoLty7TwqFLDxgj8Ixl_ddEF3C3Y6Mcxa5UF57BNTnFXmLefqsryex0XV4b5Btu4W5wZ4bjhX2M7PSXbk4lTv1YZdQxWLpzvNpEVnFueawtqedGYipqH1v4bg99YUnXDbajd2SSVQ"
|
||||
TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzQyODczOTMsImV4cCI6MTc3NDI5NDU5Mywic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.NHh21XfnRofsFkRkc-28Dz-vQAdY70lXkEmh-Mzz7Fg6gjDbZeMu7PnBwgbDP_U8r6R0mI_pDIUc1MzJe1Rf5SF2-RV36TcGzmVzb3ek9wPsy3lxST5WL-vn-qUJ7GsZiGOeQ-jDLLFn8b8tjFrD7BGv8uphrfYAbPDm0atznkdbUSQQy-rfRJWhisnDtHf99j96TuJz3dV4bfI6VGrin-jezbg6BCvUYWQtttUs7knQKEWO0sGGDxtS29sbn4MX8Jqz4-hf6N2XSlgv52aIDwTVX-lyMWzfoeuIGhvCKuDiJeVw2c0r2UZFpHqjnfhXcb0_aacukXe8z-srj8-Rdw"
|
||||
|
||||
base = "https://pashub.net/api"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue