From 8a17ea7265aad5d542782688872525cf30058542 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Mar 2026 15:46:31 +0000 Subject: [PATCH 01/17] set up docker and handler --- backend/pashub_fetcher/handler/Dockerfile | 16 ++++++++++++++++ backend/pashub_fetcher/handler/handler.py | 10 ++++++++++ backend/pashub_fetcher/processor.py | 0 3 files changed, 26 insertions(+) create mode 100644 backend/pashub_fetcher/handler/Dockerfile create mode 100644 backend/pashub_fetcher/handler/handler.py create mode 100644 backend/pashub_fetcher/processor.py diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile new file mode 100644 index 00000000..a67304ad --- /dev/null +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -0,0 +1,16 @@ +FROM mcr.microsoft.com/playwright/python:v1.42.0 + +# Set working directory (Lambda task root) +WORKDIR /var/task + +COPY backend/.env.test backend/.env + +COPY utils/ utils/ +COPY backend/pashub_fetcher/ backend/pashub_fetcher/ + +# ----------------------------- +# Lambda handler +# ----------------------------- +# CMD ["backend/pashub_fetcher/handler/handler.handler"] +# For local running +CMD ["python", "-m", "backend.pashub_fetcher.handler.handler"] \ No newline at end of file diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py new file mode 100644 index 00000000..8fd7e175 --- /dev/null +++ b/backend/pashub_fetcher/handler/handler.py @@ -0,0 +1,10 @@ +from typing import Any, Mapping + +from utils.logger import setup_logger + + +logger = setup_logger() + + +def handler(event: Mapping[str, Any], context: Any) -> None: + logger.info("Recevied message") diff --git a/backend/pashub_fetcher/processor.py b/backend/pashub_fetcher/processor.py new file mode 100644 index 00000000..e69de29b From 142024550e1d0fcea3632b48362702f6918ba9a4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Mar 2026 16:22:53 +0000 Subject: [PATCH 02/17] set up local lambda runner and invoker --- backend/pashub_fetcher/handler/Dockerfile | 13 +++++++++- backend/pashub_fetcher/handler/handler.py | 2 +- .../local_handler/docker-compose.yml} | 6 ++--- .../local_handler/invoke_local_lambda.py | 26 +++++++++++++++++++ 4 files changed, 42 insertions(+), 5 deletions(-) rename backend/{docker-compose-local-lambdas.yml => pashub_fetcher/local_handler/docker-compose.yml} (50%) create mode 100644 backend/pashub_fetcher/local_handler/invoke_local_lambda.py diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile index a67304ad..1534afdb 100644 --- a/backend/pashub_fetcher/handler/Dockerfile +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -1,5 +1,12 @@ FROM mcr.microsoft.com/playwright/python:v1.42.0 +# Install AWS Lambda RIE +ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/local/bin/aws-lambda-rie +RUN chmod +x /usr/local/bin/aws-lambda-rie + +# Install Lambda runtime client +RUN pip install awslambdaric + # Set working directory (Lambda task root) WORKDIR /var/task @@ -8,9 +15,13 @@ COPY backend/.env.test backend/.env COPY utils/ utils/ COPY backend/pashub_fetcher/ backend/pashub_fetcher/ + +# Lambda entrypoint +ENTRYPOINT ["/usr/local/bin/aws-lambda-rie", "python", "-m", "awslambdaric"] + # ----------------------------- # Lambda handler # ----------------------------- # CMD ["backend/pashub_fetcher/handler/handler.handler"] # For local running -CMD ["python", "-m", "backend.pashub_fetcher.handler.handler"] \ No newline at end of file +CMD ["backend.pashub_fetcher.handler.handler.handler"] \ No newline at end of file diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 8fd7e175..00fbd6b5 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -7,4 +7,4 @@ logger = setup_logger() def handler(event: Mapping[str, Any], context: Any) -> None: - logger.info("Recevied message") + logger.info("Received message") diff --git a/backend/docker-compose-local-lambdas.yml b/backend/pashub_fetcher/local_handler/docker-compose.yml similarity index 50% rename from backend/docker-compose-local-lambdas.yml rename to backend/pashub_fetcher/local_handler/docker-compose.yml index 50e9193b..0ee53283 100644 --- a/backend/docker-compose-local-lambdas.yml +++ b/backend/pashub_fetcher/local_handler/docker-compose.yml @@ -3,9 +3,9 @@ version: "3.9" services: categorisation-lambda: build: - context: ../ - dockerfile: backend/categorisation/handler/Dockerfile + context: ../../../ + dockerfile: backend/pashub_fetcher/handler/Dockerfile ports: - "9000:8080" env_file: - - ../.env \ No newline at end of file + - ../../../.env \ No newline at end of file diff --git a/backend/pashub_fetcher/local_handler/invoke_local_lambda.py b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py new file mode 100644 index 00000000..463ef9d8 --- /dev/null +++ b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +import json +import requests + +HOST = "localhost" +PORT = "9000" + +LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations" + +payload = { + "Records": [ + { + "body": json.dumps( + { + "uprn": 123456, + } + ) + } + ] +} + +response = requests.post(LAMBDA_URL, json=payload) + +print("Status code:", response.status_code) +print("Response:") +print(response.text) From a146c7d82e25bee4a75e8fc770596bedc159ae09 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Mar 2026 16:45:53 +0000 Subject: [PATCH 03/17] go to pas hub with playwright --- .devcontainer/backend/requirements.txt | 1 + backend/pashub_fetcher/handler/Dockerfile | 4 ++-- backend/pashub_fetcher/handler/handler.py | 12 +++++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index 5cd40ced..d7afa6a2 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -6,6 +6,7 @@ psycopg2-binary==2.9.10 python-jose==3.3.0 cryptography==43.0.3 mangum==0.19.0 +playwright==1.58.0 # AWS boto3==1.35.44 # Data diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile index 1534afdb..2128d12c 100644 --- a/backend/pashub_fetcher/handler/Dockerfile +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -1,11 +1,11 @@ -FROM mcr.microsoft.com/playwright/python:v1.42.0 +FROM mcr.microsoft.com/playwright/python:v1.58.0-jammy # Install AWS Lambda RIE ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/local/bin/aws-lambda-rie RUN chmod +x /usr/local/bin/aws-lambda-rie # Install Lambda runtime client -RUN pip install awslambdaric +RUN pip install awslambdaric playwright==1.58.0 # Set working directory (Lambda task root) WORKDIR /var/task diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 00fbd6b5..2fe2af1d 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,5 +1,7 @@ from typing import Any, Mapping +from playwright.sync_api import sync_playwright + from utils.logger import setup_logger @@ -7,4 +9,12 @@ logger = setup_logger() def handler(event: Mapping[str, Any], context: Any) -> None: - logger.info("Received message") + logger.info("Starting Playwright flow") + + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + + page.goto("https://pashub.net/") + + logger.info(f"Page title: {page.title()}") From b156617a2b1248bff533ad5acd0ae11beacba5c8 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Mar 2026 17:04:49 +0000 Subject: [PATCH 04/17] try logging into pas hub --- backend/pashub_fetcher/handler/handler.py | 46 ++++++++++++++++--- .../local_handler/docker-compose.yml | 2 +- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 2fe2af1d..3d22ef44 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,6 +1,6 @@ -from typing import Any, Mapping +from typing import Any, Dict, Mapping -from playwright.sync_api import sync_playwright +from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError from utils.logger import setup_logger @@ -8,13 +8,47 @@ from utils.logger import setup_logger logger = setup_logger() -def handler(event: Mapping[str, Any], context: Any) -> None: +def handler(event: Mapping[str, Any], context: Any) -> Dict[str, str]: logger.info("Starting Playwright flow") + email = "random@test.com" + password = "my_fake_password" + with sync_playwright() as p: - browser = p.chromium.launch(headless=True) + browser = p.chromium.launch( + headless=True, + args=["--no-sandbox", "--disable-dev-shm-usage"], + ) page = browser.new_page() - page.goto("https://pashub.net/") + try: + logger.info("Navigating to site...") + page.goto("https://pashub.net/", timeout=30000) - logger.info(f"Page title: {page.title()}") + logger.info("Filling login form...") + page.fill("#email", email) + page.fill("#password", password) + + logger.info("Submitting login...") + page.click("#btn-login") + + page.wait_for_timeout(3000) + + if "login" in page.url.lower(): + logger.error("Login failed (still on login page)") + return {"status": "error", "message": "Login failed"} + + logger.info(f"Login likely successful. URL: {page.url}") + + return {"status": "ok"} + + except PlaywrightTimeoutError as e: + logger.error(f"Timeout during login flow: {str(e)}") + return {"status": "error", "message": "Timeout during login"} + + except Exception as e: + logger.error(f"Unexpected error: {str(e)}") + return {"status": "error", "message": str(e)} + + finally: + browser.close() diff --git a/backend/pashub_fetcher/local_handler/docker-compose.yml b/backend/pashub_fetcher/local_handler/docker-compose.yml index 0ee53283..34ba9277 100644 --- a/backend/pashub_fetcher/local_handler/docker-compose.yml +++ b/backend/pashub_fetcher/local_handler/docker-compose.yml @@ -1,7 +1,7 @@ version: "3.9" services: - categorisation-lambda: + pashub-fetcher-lambda: build: context: ../../../ dockerfile: backend/pashub_fetcher/handler/Dockerfile From 4d641af0c135b11973dc29c118fabd715f205ea9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Mar 2026 09:36:57 +0000 Subject: [PATCH 05/17] extract token from localStorage after logging in --- backend/pashub_fetcher/handler/handler.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 3d22ef44..a5cbe55a 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -40,7 +40,19 @@ def handler(event: Mapping[str, Any], context: Any) -> Dict[str, str]: logger.info(f"Login likely successful. URL: {page.url}") - return {"status": "ok"} + token = page.evaluate( + """() => { + return localStorage.getItem('token'); + }""" + ) + + if not token: + logger.error("Login succeeded but no token found") + return {"status": "error", "message": "No token found"} + + logger.info(f"Token extracted successfully: {token}") + + return {"status": "ok", "token": token} except PlaywrightTimeoutError as e: logger.error(f"Timeout during login flow: {str(e)}") From b8c0c9ea653df2818aa75bf41af32e2783b39a5a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Mar 2026 09:56:53 +0000 Subject: [PATCH 06/17] move playwright process to separate file --- backend/pashub_fetcher/handler/handler.py | 69 ++++------------------- backend/pashub_fetcher/processor.py | 0 backend/pashub_fetcher/token_getter.py | 54 ++++++++++++++++++ 3 files changed, 65 insertions(+), 58 deletions(-) delete mode 100644 backend/pashub_fetcher/processor.py create mode 100644 backend/pashub_fetcher/token_getter.py diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index a5cbe55a..11f457a6 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,66 +1,19 @@ -from typing import Any, Dict, Mapping - -from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError +from typing import Any, Mapping +from backend.pashub_fetcher.token_getter import get_token_from_local_storage from utils.logger import setup_logger logger = setup_logger() -def handler(event: Mapping[str, Any], context: Any) -> Dict[str, str]: - logger.info("Starting Playwright flow") +def handler(event: Mapping[str, Any], context: Any) -> None: + pas_hub_email = "random@test.com" + pas_hub_password = "my_fake_password" - email = "random@test.com" - password = "my_fake_password" - - with sync_playwright() as p: - browser = p.chromium.launch( - headless=True, - args=["--no-sandbox", "--disable-dev-shm-usage"], - ) - page = browser.new_page() - - try: - logger.info("Navigating to site...") - page.goto("https://pashub.net/", timeout=30000) - - logger.info("Filling login form...") - page.fill("#email", email) - page.fill("#password", password) - - logger.info("Submitting login...") - page.click("#btn-login") - - page.wait_for_timeout(3000) - - if "login" in page.url.lower(): - logger.error("Login failed (still on login page)") - return {"status": "error", "message": "Login failed"} - - logger.info(f"Login likely successful. URL: {page.url}") - - token = page.evaluate( - """() => { - return localStorage.getItem('token'); - }""" - ) - - if not token: - logger.error("Login succeeded but no token found") - return {"status": "error", "message": "No token found"} - - logger.info(f"Token extracted successfully: {token}") - - return {"status": "ok", "token": token} - - except PlaywrightTimeoutError as e: - logger.error(f"Timeout during login flow: {str(e)}") - return {"status": "error", "message": "Timeout during login"} - - except Exception as e: - logger.error(f"Unexpected error: {str(e)}") - return {"status": "error", "message": str(e)} - - finally: - browser.close() + try: + token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password) + logger.info(f"Token extracted successfully: {token}") + except: + logger.error("Error getting auth token from Pas Hub") + raise diff --git a/backend/pashub_fetcher/processor.py b/backend/pashub_fetcher/processor.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/pashub_fetcher/token_getter.py b/backend/pashub_fetcher/token_getter.py new file mode 100644 index 00000000..d5481dd5 --- /dev/null +++ b/backend/pashub_fetcher/token_getter.py @@ -0,0 +1,54 @@ +from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError + +from utils.logger import setup_logger + +logger = setup_logger() + + +def get_token_from_local_storage(email: str, password: str) -> str: + logger.info("Starting Playwright flow") + + with sync_playwright() as p: + browser = p.chromium.launch( + headless=True, + args=["--no-sandbox", "--disable-dev-shm-usage"], + ) + page = browser.new_page() + + try: + logger.info("Navigating to site...") + page.goto("https://pashub.net/", timeout=30000) + + logger.info("Filling login form...") + page.fill("#email", email) + page.fill("#password", password) + + logger.info("Submitting login...") + page.click("#btn-login") + + page.wait_for_timeout(3000) + + if "login" in page.url.lower(): + raise Exception("Login failed (still on login page)") + + logger.info(f"Login likely successful. URL: {page.url}") + + token = page.evaluate( + """() => { + return localStorage.getItem('token'); + }""" + ) + + if not token: + raise Exception("Login succeeded but no token found") + + return token + + except PlaywrightTimeoutError as e: + raise Exception(f"Timeout during login flow: {str(e)}") + + except Exception as e: + raise Exception(f"Unexpected error: {str(e)}") + + finally: + browser.close() From cd514b6e5d630e4a9a665eee54b8a3994067c061 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Mar 2026 10:29:42 +0000 Subject: [PATCH 07/17] add and implement cotality_client --- backend/pashub_fetcher/handler/handler.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 11f457a6..a9288783 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,5 +1,6 @@ -from typing import Any, Mapping +from typing import Any, List, Mapping +from backend.pashub_fetcher.cotality_client import CotalityClient from backend.pashub_fetcher.token_getter import get_token_from_local_storage from utils.logger import setup_logger @@ -17,3 +18,9 @@ def handler(event: Mapping[str, Any], context: Any) -> None: except: logger.error("Error getting auth token from Pas Hub") raise + + client = CotalityClient(token=token) + uprn = "100061885568" # TODO: get from request body + + saved_files: List[str] = client.get_evidence_files(uprn=uprn) + print(f"saved {len(saved_files)} files") From 3dc14480e9dd7f41a175e5c9f511f979afc09703 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Mar 2026 10:31:30 +0000 Subject: [PATCH 08/17] rename evidence file getter method --- backend/pashub_fetcher/handler/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index a9288783..b5ec4320 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -22,5 +22,5 @@ def handler(event: Mapping[str, Any], context: Any) -> None: client = CotalityClient(token=token) uprn = "100061885568" # TODO: get from request body - saved_files: List[str] = client.get_evidence_files(uprn=uprn) + saved_files: List[str] = client.get_evidence_files_by_uprn(uprn=uprn) print(f"saved {len(saved_files)} files") From 6617d9e6145e9d903dafdbc6bdb70fc30488113e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Mar 2026 16:16:20 +0000 Subject: [PATCH 09/17] improved typing --- backend/pashub_fetcher/core_files.py | 13 +++ backend/pashub_fetcher/cotality_client.py | 110 +++++++++++++++++++ backend/pashub_fetcher/evidence_file_data.py | 25 +++++ backend/pashub_fetcher/evidence_metadata.py | 16 +++ scripts/download_cotality_evidence.py | 2 +- 5 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 backend/pashub_fetcher/core_files.py create mode 100644 backend/pashub_fetcher/cotality_client.py create mode 100644 backend/pashub_fetcher/evidence_file_data.py create mode 100644 backend/pashub_fetcher/evidence_metadata.py diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py new file mode 100644 index 00000000..82637f70 --- /dev/null +++ b/backend/pashub_fetcher/core_files.py @@ -0,0 +1,13 @@ +from enum import Enum + + +class CoreFiles(Enum): + PHOTOPACK = "Photopack" + SITENOTE = "SiteNote" + RDSAP_SITENOTE = "RdSAP_SiteNote" + PAS2023_VENTILATION = "PAS 2023 Ventilation Assessment Report" + PAS2023_CONDITION = "PAS 2023 Condition Report" + PAS_SIGNIFICANCE = "PAS Significance" + PAR_PHOTOPACK = "PAR Photo Pack" + PAS2023_PROPERTY = "PAS 2023 Property Assessment Report" + PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report" diff --git a/backend/pashub_fetcher/cotality_client.py b/backend/pashub_fetcher/cotality_client.py new file mode 100644 index 00000000..9deda776 --- /dev/null +++ b/backend/pashub_fetcher/cotality_client.py @@ -0,0 +1,110 @@ +from typing import List, Optional + +import requests + +from backend.pashub_fetcher.evidence_file_data import EvidenceFileData +from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata + + +class CotalityClient: + def __init__(self, token: str): + self.token = token + self.company_id = "cb5249e2-8f31-4ef4-aefd-08ddaccb1fa2" + self.base = "https://pashub.net/api" + + self.session = requests.Session() + self.session.headers.update( + { + "Authorization": f"Bearer {self.token}", + "Accept": "application/json", + } + ) + + def get_core_envidence_files_by_job_id(self, job_id: str) -> List[str]: + # url = f"{self.base}/jobs/{job_id}/evidence" + + raise NotImplementedError + + def get_evidence_files_by_uprn(self, uprn: str) -> List[str]: + """ + Download evidence files for the most recent job for a UPRN. + Returns a list of saved filenames. + """ + + job_id: Optional[str] = self._get_latest_job_id(uprn) + if not job_id: + return [] + + evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id) + if not evidence_list: + return [] + + saved_files: List[str] = [] + + for evidence in evidence_list: + evidence_id = evidence.file_id + if not evidence_id: + continue + + metadata: EvidenceMetadata = self._get_evidence_metadata( + job_id, evidence_id + ) + + download_url: str = self._build_download_url(metadata, evidence.file_id) + file_name = evidence.file_name + + self._download_file(download_url, file_name) + saved_files.append(file_name) + + return saved_files + + def _get_latest_job_id(self, uprn: str) -> Optional[str]: + url = f"{self.base}/jobs" + params = { + "pageIndex": 0, + "pageSize": 20, + "orderBy": "createdUtc", + "orderDesc": "true", + "addressUprn": uprn, + "companyId": self.company_id, + } + + r = self.session.get(url, params=params) + r.raise_for_status() + + jobs = r.json().get("results", []) + return jobs[0]["id"] if jobs else None + + def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]: + url = f"{self.base}/jobs/{job_id}/evidence" + + r = self.session.get(url) + r.raise_for_status() + + results = r.json().get("results", []) + + return [EvidenceFileData.from_api(item) for item in results] + + def _get_evidence_metadata(self, job_id: str, evidence_id: str) -> EvidenceMetadata: + url = f"{self.base}/jobs/{job_id}/evidenceMetadata" + params = {"evidenceIds": evidence_id} + + r = self.session.get(url, params=params) + r.raise_for_status() + + return EvidenceMetadata.from_api(r.json()) + + def _build_download_url(self, metadata: EvidenceMetadata, file_id: str) -> str: + container = metadata.container_name + blob_uri = metadata.blob_uri + + base, sas = blob_uri.split("?", 1) + + return f"{base}{container}/{file_id}?{sas}" + + def _download_file(self, url: str, file_name: str) -> None: + r = self.session.get(url) + r.raise_for_status() + + with open(file_name, "wb") as f: + f.write(r.content) diff --git a/backend/pashub_fetcher/evidence_file_data.py b/backend/pashub_fetcher/evidence_file_data.py new file mode 100644 index 00000000..8ecc2441 --- /dev/null +++ b/backend/pashub_fetcher/evidence_file_data.py @@ -0,0 +1,25 @@ +from __future__ import annotations +from dataclasses import dataclass +from typing import Any, Dict, Optional + + +@dataclass +class EvidenceFileData: + file_id: str + file_name: str + created_utc: str + file_size: int + file_extension: str + + evidence_category: Optional[str] = None + + @classmethod + def from_api(cls, data: Dict[str, Any]) -> EvidenceFileData: + return cls( + file_id=data["fileId"], + file_name=data["fileName"], + created_utc=data["createdUtc"], + file_size=data["fileSize"], + file_extension=data["fileExtension"], + evidence_category=data.get("evidenceCategory"), + ) diff --git a/backend/pashub_fetcher/evidence_metadata.py b/backend/pashub_fetcher/evidence_metadata.py new file mode 100644 index 00000000..e3a9536e --- /dev/null +++ b/backend/pashub_fetcher/evidence_metadata.py @@ -0,0 +1,16 @@ +from __future__ import annotations +from dataclasses import dataclass +from typing import Any, Dict + + +@dataclass +class EvidenceMetadata: + container_name: str + blob_uri: str + + @classmethod + def from_api(cls, data: Dict[str, Any]) -> EvidenceMetadata: + return cls( + container_name=data["containerName"], + blob_uri=data["blobUri"], + ) diff --git a/scripts/download_cotality_evidence.py b/scripts/download_cotality_evidence.py index 43f9afea..93148a3a 100644 --- a/scripts/download_cotality_evidence.py +++ b/scripts/download_cotality_evidence.py @@ -1,7 +1,7 @@ import requests import json -TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzMyMzc4MjQsImV4cCI6MTc3MzI0NTAyNCwic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.mkkxeZiD_ByHY4TJKpLQ-trmeGs15s0ekL6u1n-ek9j-EzNyf6qalEHCyHf8gzdNhU_vay96bIOMRHp4vXFaLqSANwKZayIS3EoA_b9-u2FAZpooxEvReAMNJGoZ6WLD01AQXWv-l7ww1ZqAnQzw0moL_Oma6hVmA5oa-RJKJ3MerS7e0Wei97Db48E140-EAbQf2iPcKYYtCNRA4il6n8DFiqGeoUMGo99jkR1ceZAvMpOAj8RhKX-4qSiDfX6yXUS2G96U5m7S_GWI-DEj5TazkN10Af3TyOY3EVjmZoJcRpiAR4cFmlfcTydjrShU03DWmPZm1QItf2McxfCpNA" +TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzQyNzg3NjIsImV4cCI6MTc3NDI4NTk2Miwic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.ESIbau52J7KXL22tM8GlO9eV0f0pCOFdoQGL2YcjsTEcSeucHBuI9lHXT2dNJn0E8qlgafjazaMkoMs2g0TiTUUZU6XsKqKpUAJy4kk-qKp53V5az7e2MG9uDSa5bB1vWsQQw37zaNVQ0FQkpYHSiFeGoBh1PjuKwCpLjbl94bx7S4bQKaJSZRUj5TS75k6HnSOhUtN9LYLMPRoLty7TwqFLDxgj8Ixl_ddEF3C3Y6Mcxa5UF57BNTnFXmLefqsryex0XV4b5Btu4W5wZ4bjhX2M7PSXbk4lTv1YZdQxWLpzvNpEVnFueawtqedGYipqH1v4bg99YUnXDbajd2SSVQ" base = "https://pashub.net/api" From 609468cff961993a731668cebb8505e11a8f6fed Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Mar 2026 08:47:59 +0000 Subject: [PATCH 10/17] new methods for downloading all core files for pashub URL. Download currently not being authorised --- backend/pashub_fetcher/cotality_client.py | 77 ++++++++++++++--------- backend/pashub_fetcher/handler/Dockerfile | 2 +- backend/pashub_fetcher/handler/handler.py | 6 +- scripts/download_cotality_evidence.py | 2 +- 4 files changed, 53 insertions(+), 34 deletions(-) diff --git a/backend/pashub_fetcher/cotality_client.py b/backend/pashub_fetcher/cotality_client.py index 9deda776..0ae473b7 100644 --- a/backend/pashub_fetcher/cotality_client.py +++ b/backend/pashub_fetcher/cotality_client.py @@ -1,13 +1,23 @@ -from typing import List, Optional +from collections import defaultdict +from typing import Dict, List, Optional +from datetime import datetime, timezone import requests +from backend.pashub_fetcher.core_files import CoreFiles from backend.pashub_fetcher.evidence_file_data import EvidenceFileData from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata +from utils.logger import setup_logger + + +logger = setup_logger() class CotalityClient: def __init__(self, token: str): + + logger.info(f"Container UTC time: {datetime.now(timezone.utc)}") + self.token = token self.company_id = "cb5249e2-8f31-4ef4-aefd-08ddaccb1fa2" self.base = "https://pashub.net/api" @@ -19,61 +29,68 @@ class CotalityClient: "Accept": "application/json", } ) + logger.info("Finished initialising CotalityClient") def get_core_envidence_files_by_job_id(self, job_id: str) -> List[str]: - # url = f"{self.base}/jobs/{job_id}/evidence" - - raise NotImplementedError - - def get_evidence_files_by_uprn(self, uprn: str) -> List[str]: - """ - Download evidence files for the most recent job for a UPRN. - Returns a list of saved filenames. - """ - - job_id: Optional[str] = self._get_latest_job_id(uprn) - if not job_id: - return [] - + logger.info(f"Getting Core Evidence Files for job ID {job_id}") evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id) + logger.info(f"Found {len(evidence_list)} Evidence files to get") if not evidence_list: return [] saved_files: List[str] = [] - for evidence in evidence_list: + core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files( + evidence_list + ) + + logger.info(f"Number of core files to download is {len(core_files)}") + + for _, evidence in core_files.items(): evidence_id = evidence.file_id if not evidence_id: continue + logger.info(f"Getting metadata for file {evidence.file_name}") metadata: EvidenceMetadata = self._get_evidence_metadata( job_id, evidence_id ) download_url: str = self._build_download_url(metadata, evidence.file_id) + logger.info(f"Download URL: {download_url}") file_name = evidence.file_name self._download_file(download_url, file_name) + logger.info("Successfully downloaded file") saved_files.append(file_name) return saved_files - def _get_latest_job_id(self, uprn: str) -> Optional[str]: - url = f"{self.base}/jobs" - params = { - "pageIndex": 0, - "pageSize": 20, - "orderBy": "createdUtc", - "orderDesc": "true", - "addressUprn": uprn, - "companyId": self.company_id, - } + def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]: + for core_file in CoreFiles: + if file.file_name.startswith(core_file.value): + return core_file + return None - r = self.session.get(url, params=params) - r.raise_for_status() + def _select_latest_core_files( + self, + files: List[EvidenceFileData], + ) -> Dict[CoreFiles, EvidenceFileData]: + grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list) - jobs = r.json().get("results", []) - return jobs[0]["id"] if jobs else None + for file in files: + core_type = self._get_core_file_type(file) + if not core_type: + continue + grouped[core_type].append(file) + + latest_files: Dict[CoreFiles, EvidenceFileData] = {} + + for core_type, group in grouped.items(): + latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc)) + latest_files[core_type] = latest + + return latest_files def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]: url = f"{self.base}/jobs/{job_id}/evidence" diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile index 2128d12c..cbd3c228 100644 --- a/backend/pashub_fetcher/handler/Dockerfile +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -5,7 +5,7 @@ ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest RUN chmod +x /usr/local/bin/aws-lambda-rie # Install Lambda runtime client -RUN pip install awslambdaric playwright==1.58.0 +RUN pip install awslambdaric playwright==1.58.0 requests # Set working directory (Lambda task root) WORKDIR /var/task diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index b5ec4320..053ad2f4 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -20,7 +20,9 @@ def handler(event: Mapping[str, Any], context: Any) -> None: raise client = CotalityClient(token=token) - uprn = "100061885568" # TODO: get from request body + # uprn = "100061885568" # TODO: get from request body + job_id = "5abf6e27-e4c4-4ba8-b69d-9e34939e0002" # TODO: get from request body - saved_files: List[str] = client.get_evidence_files_by_uprn(uprn=uprn) + saved_files: List[str] = client.get_core_envidence_files_by_job_id(job_id) + # saved_files: List[str] = client.get_evidence_files_by_uprn(uprn=uprn) print(f"saved {len(saved_files)} files") diff --git a/scripts/download_cotality_evidence.py b/scripts/download_cotality_evidence.py index 93148a3a..76400a99 100644 --- a/scripts/download_cotality_evidence.py +++ b/scripts/download_cotality_evidence.py @@ -1,7 +1,7 @@ import requests import json -TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzQyNzg3NjIsImV4cCI6MTc3NDI4NTk2Miwic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.ESIbau52J7KXL22tM8GlO9eV0f0pCOFdoQGL2YcjsTEcSeucHBuI9lHXT2dNJn0E8qlgafjazaMkoMs2g0TiTUUZU6XsKqKpUAJy4kk-qKp53V5az7e2MG9uDSa5bB1vWsQQw37zaNVQ0FQkpYHSiFeGoBh1PjuKwCpLjbl94bx7S4bQKaJSZRUj5TS75k6HnSOhUtN9LYLMPRoLty7TwqFLDxgj8Ixl_ddEF3C3Y6Mcxa5UF57BNTnFXmLefqsryex0XV4b5Btu4W5wZ4bjhX2M7PSXbk4lTv1YZdQxWLpzvNpEVnFueawtqedGYipqH1v4bg99YUnXDbajd2SSVQ" +TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzQyODczOTMsImV4cCI6MTc3NDI5NDU5Mywic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.NHh21XfnRofsFkRkc-28Dz-vQAdY70lXkEmh-Mzz7Fg6gjDbZeMu7PnBwgbDP_U8r6R0mI_pDIUc1MzJe1Rf5SF2-RV36TcGzmVzb3ek9wPsy3lxST5WL-vn-qUJ7GsZiGOeQ-jDLLFn8b8tjFrD7BGv8uphrfYAbPDm0atznkdbUSQQy-rfRJWhisnDtHf99j96TuJz3dV4bfI6VGrin-jezbg6BCvUYWQtttUs7knQKEWO0sGGDxtS29sbn4MX8Jqz4-hf6N2XSlgv52aIDwTVX-lyMWzfoeuIGhvCKuDiJeVw2c0r2UZFpHqjnfhXcb0_aacukXe8z-srj8-Rdw" base = "https://pashub.net/api" From ab40bca32b216583476214d78c092b661de57da7 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Mar 2026 08:58:34 +0000 Subject: [PATCH 11/17] download using requests rather than self.session --- backend/pashub_fetcher/cotality_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/cotality_client.py b/backend/pashub_fetcher/cotality_client.py index 0ae473b7..a9e991bf 100644 --- a/backend/pashub_fetcher/cotality_client.py +++ b/backend/pashub_fetcher/cotality_client.py @@ -120,7 +120,7 @@ class CotalityClient: return f"{base}{container}/{file_id}?{sas}" def _download_file(self, url: str, file_name: str) -> None: - r = self.session.get(url) + r = requests.get(url) r.raise_for_status() with open(file_name, "wb") as f: From ff2df292a861e85ef92e3abfcc114d524a56534f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Mar 2026 09:05:48 +0000 Subject: [PATCH 12/17] remove unused logs and commented lines --- backend/pashub_fetcher/cotality_client.py | 3 --- backend/pashub_fetcher/handler/handler.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/backend/pashub_fetcher/cotality_client.py b/backend/pashub_fetcher/cotality_client.py index a9e991bf..933e2829 100644 --- a/backend/pashub_fetcher/cotality_client.py +++ b/backend/pashub_fetcher/cotality_client.py @@ -16,8 +16,6 @@ logger = setup_logger() class CotalityClient: def __init__(self, token: str): - logger.info(f"Container UTC time: {datetime.now(timezone.utc)}") - self.token = token self.company_id = "cb5249e2-8f31-4ef4-aefd-08ddaccb1fa2" self.base = "https://pashub.net/api" @@ -57,7 +55,6 @@ class CotalityClient: ) download_url: str = self._build_download_url(metadata, evidence.file_id) - logger.info(f"Download URL: {download_url}") file_name = evidence.file_name self._download_file(download_url, file_name) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 053ad2f4..971cd6cd 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -20,9 +20,7 @@ def handler(event: Mapping[str, Any], context: Any) -> None: raise client = CotalityClient(token=token) - # uprn = "100061885568" # TODO: get from request body job_id = "5abf6e27-e4c4-4ba8-b69d-9e34939e0002" # TODO: get from request body saved_files: List[str] = client.get_core_envidence_files_by_job_id(job_id) - # saved_files: List[str] = client.get_evidence_files_by_uprn(uprn=uprn) print(f"saved {len(saved_files)} files") From 54c05096e8102e9d8fbd6c281054aea293b058e2 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Mar 2026 09:08:51 +0000 Subject: [PATCH 13/17] delete unused import --- backend/pashub_fetcher/cotality_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/cotality_client.py b/backend/pashub_fetcher/cotality_client.py index 933e2829..e32ec7eb 100644 --- a/backend/pashub_fetcher/cotality_client.py +++ b/backend/pashub_fetcher/cotality_client.py @@ -1,6 +1,6 @@ from collections import defaultdict from typing import Dict, List, Optional -from datetime import datetime, timezone +from datetime import datetime import requests From 53502c28a80b748062e140dbbd0be7c0b6b1896b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Mar 2026 09:17:05 +0000 Subject: [PATCH 14/17] handle multiple jobs in a single call --- backend/pashub_fetcher/handler/handler.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 971cd6cd..2a39db60 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,3 +1,4 @@ +import time from typing import Any, List, Mapping from backend.pashub_fetcher.cotality_client import CotalityClient @@ -20,7 +21,15 @@ def handler(event: Mapping[str, Any], context: Any) -> None: raise client = CotalityClient(token=token) - job_id = "5abf6e27-e4c4-4ba8-b69d-9e34939e0002" # TODO: get from request body - saved_files: List[str] = client.get_core_envidence_files_by_job_id(job_id) + jobs = [ + "5abf6e27-e4c4-4ba8-b69d-9e34939e0002", + "047f4455-85e2-4293-97b1-6b460137d33e", + ] # TODO: get these from request body + + saved_files: List[str] = [] + for job_id in jobs: + saved_files.extend(client.get_core_envidence_files_by_job_id(job_id)) + time.sleep(10) # Simulate manual download + print(f"saved {len(saved_files)} files") From 7c0a947bf4a6feee8c5d59600215418122c742e5 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Mar 2026 09:22:52 +0000 Subject: [PATCH 15/17] try regetting token once if auth error during download --- backend/pashub_fetcher/cotality_client.py | 12 ++++++++++++ backend/pashub_fetcher/handler/handler.py | 16 ++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/backend/pashub_fetcher/cotality_client.py b/backend/pashub_fetcher/cotality_client.py index e32ec7eb..db7b8bd4 100644 --- a/backend/pashub_fetcher/cotality_client.py +++ b/backend/pashub_fetcher/cotality_client.py @@ -13,6 +13,10 @@ from utils.logger import setup_logger logger = setup_logger() +class UnauthorizedError(Exception): + pass + + class CotalityClient: def __init__(self, token: str): @@ -93,6 +97,10 @@ class CotalityClient: url = f"{self.base}/jobs/{job_id}/evidence" r = self.session.get(url) + + if r.status_code == 401: + raise UnauthorizedError("Token expired or invalid") + r.raise_for_status() results = r.json().get("results", []) @@ -118,6 +126,10 @@ class CotalityClient: def _download_file(self, url: str, file_name: str) -> None: r = requests.get(url) + + if r.status_code == 401: + raise UnauthorizedError() + r.raise_for_status() with open(file_name, "wb") as f: diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 2a39db60..47e45714 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,7 +1,7 @@ import time from typing import Any, List, Mapping -from backend.pashub_fetcher.cotality_client import CotalityClient +from backend.pashub_fetcher.cotality_client import CotalityClient, UnauthorizedError from backend.pashub_fetcher.token_getter import get_token_from_local_storage from utils.logger import setup_logger @@ -29,7 +29,19 @@ def handler(event: Mapping[str, Any], context: Any) -> None: saved_files: List[str] = [] for job_id in jobs: - saved_files.extend(client.get_core_envidence_files_by_job_id(job_id)) + try: + saved_files.extend(client.get_core_envidence_files_by_job_id(job_id)) + + except UnauthorizedError: + logger.warning("Token expired — refreshing") + + token = get_token_from_local_storage(pas_hub_email, pas_hub_password) + + client = CotalityClient(token=token) + + # retry once + saved_files.extend(client.get_core_envidence_files_by_job_id(job_id)) + time.sleep(10) # Simulate manual download print(f"saved {len(saved_files)} files") From 06e0775904eb2dc9249ed9dc5913f5747c8908ac Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Mar 2026 09:23:39 +0000 Subject: [PATCH 16/17] try regetting token once if auth error during download --- backend/pashub_fetcher/cotality_client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/pashub_fetcher/cotality_client.py b/backend/pashub_fetcher/cotality_client.py index db7b8bd4..b4a30dc2 100644 --- a/backend/pashub_fetcher/cotality_client.py +++ b/backend/pashub_fetcher/cotality_client.py @@ -97,7 +97,6 @@ class CotalityClient: url = f"{self.base}/jobs/{job_id}/evidence" r = self.session.get(url) - if r.status_code == 401: raise UnauthorizedError("Token expired or invalid") @@ -112,6 +111,9 @@ class CotalityClient: params = {"evidenceIds": evidence_id} r = self.session.get(url, params=params) + if r.status_code == 401: + raise UnauthorizedError() + r.raise_for_status() return EvidenceMetadata.from_api(r.json()) @@ -126,7 +128,6 @@ class CotalityClient: def _download_file(self, url: str, file_name: str) -> None: r = requests.get(url) - if r.status_code == 401: raise UnauthorizedError() From 21f5cd40da8af0a648b68e0d39a06bb0238581ca Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Mar 2026 09:53:12 +0000 Subject: [PATCH 17/17] remove token from log --- backend/pashub_fetcher/handler/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 47e45714..df187f3e 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -15,7 +15,7 @@ def handler(event: Mapping[str, Any], context: Any) -> None: try: token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password) - logger.info(f"Token extracted successfully: {token}") + logger.info(f"Token extracted successfully") except: logger.error("Error getting auth token from Pas Hub") raise