diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index 5cd40ced..d7afa6a2 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -6,6 +6,7 @@ psycopg2-binary==2.9.10 python-jose==3.3.0 cryptography==43.0.3 mangum==0.19.0 +playwright==1.58.0 # AWS boto3==1.35.44 # Data diff --git a/backend/docker-compose-local-lambdas.yml b/backend/docker-compose-local-lambdas.yml deleted file mode 100644 index 50e9193b..00000000 --- a/backend/docker-compose-local-lambdas.yml +++ /dev/null @@ -1,11 +0,0 @@ -version: "3.9" - -services: - categorisation-lambda: - build: - context: ../ - dockerfile: backend/categorisation/handler/Dockerfile - ports: - - "9000:8080" - env_file: - - ../.env \ No newline at end of file diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py new file mode 100644 index 00000000..82637f70 --- /dev/null +++ b/backend/pashub_fetcher/core_files.py @@ -0,0 +1,13 @@ +from enum import Enum + + +class CoreFiles(Enum): + PHOTOPACK = "Photopack" + SITENOTE = "SiteNote" + RDSAP_SITENOTE = "RdSAP_SiteNote" + PAS2023_VENTILATION = "PAS 2023 Ventilation Assessment Report" + PAS2023_CONDITION = "PAS 2023 Condition Report" + PAS_SIGNIFICANCE = "PAS Significance" + PAR_PHOTOPACK = "PAR Photo Pack" + PAS2023_PROPERTY = "PAS 2023 Property Assessment Report" + PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report" diff --git a/backend/pashub_fetcher/cotality_client.py b/backend/pashub_fetcher/cotality_client.py new file mode 100644 index 00000000..b4a30dc2 --- /dev/null +++ b/backend/pashub_fetcher/cotality_client.py @@ -0,0 +1,137 @@ +from collections import defaultdict +from typing import Dict, List, Optional +from datetime import datetime + +import requests + +from backend.pashub_fetcher.core_files import CoreFiles +from backend.pashub_fetcher.evidence_file_data import EvidenceFileData +from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata +from utils.logger import setup_logger + + +logger = setup_logger() + + +class UnauthorizedError(Exception): + pass + + +class CotalityClient: + def __init__(self, token: str): + + self.token = token + self.company_id = "cb5249e2-8f31-4ef4-aefd-08ddaccb1fa2" + self.base = "https://pashub.net/api" + + self.session = requests.Session() + self.session.headers.update( + { + "Authorization": f"Bearer {self.token}", + "Accept": "application/json", + } + ) + logger.info("Finished initialising CotalityClient") + + def get_core_envidence_files_by_job_id(self, job_id: str) -> List[str]: + logger.info(f"Getting Core Evidence Files for job ID {job_id}") + evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id) + logger.info(f"Found {len(evidence_list)} Evidence files to get") + if not evidence_list: + return [] + + saved_files: List[str] = [] + + core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files( + evidence_list + ) + + logger.info(f"Number of core files to download is {len(core_files)}") + + for _, evidence in core_files.items(): + evidence_id = evidence.file_id + if not evidence_id: + continue + + logger.info(f"Getting metadata for file {evidence.file_name}") + metadata: EvidenceMetadata = self._get_evidence_metadata( + job_id, evidence_id + ) + + download_url: str = self._build_download_url(metadata, evidence.file_id) + file_name = evidence.file_name + + self._download_file(download_url, file_name) + logger.info("Successfully downloaded file") + saved_files.append(file_name) + + return saved_files + + def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]: + for core_file in CoreFiles: + if file.file_name.startswith(core_file.value): + return core_file + return None + + def _select_latest_core_files( + self, + files: List[EvidenceFileData], + ) -> Dict[CoreFiles, EvidenceFileData]: + grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list) + + for file in files: + core_type = self._get_core_file_type(file) + if not core_type: + continue + grouped[core_type].append(file) + + latest_files: Dict[CoreFiles, EvidenceFileData] = {} + + for core_type, group in grouped.items(): + latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc)) + latest_files[core_type] = latest + + return latest_files + + def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]: + url = f"{self.base}/jobs/{job_id}/evidence" + + r = self.session.get(url) + if r.status_code == 401: + raise UnauthorizedError("Token expired or invalid") + + r.raise_for_status() + + results = r.json().get("results", []) + + return [EvidenceFileData.from_api(item) for item in results] + + def _get_evidence_metadata(self, job_id: str, evidence_id: str) -> EvidenceMetadata: + url = f"{self.base}/jobs/{job_id}/evidenceMetadata" + params = {"evidenceIds": evidence_id} + + r = self.session.get(url, params=params) + if r.status_code == 401: + raise UnauthorizedError() + + r.raise_for_status() + + return EvidenceMetadata.from_api(r.json()) + + def _build_download_url(self, metadata: EvidenceMetadata, file_id: str) -> str: + container = metadata.container_name + blob_uri = metadata.blob_uri + + base, sas = blob_uri.split("?", 1) + + return f"{base}{container}/{file_id}?{sas}" + + def _download_file(self, url: str, file_name: str) -> None: + r = requests.get(url) + if r.status_code == 401: + raise UnauthorizedError() + + r.raise_for_status() + + with open(file_name, "wb") as f: + f.write(r.content) diff --git a/backend/pashub_fetcher/evidence_file_data.py b/backend/pashub_fetcher/evidence_file_data.py new file mode 100644 index 00000000..8ecc2441 --- /dev/null +++ b/backend/pashub_fetcher/evidence_file_data.py @@ -0,0 +1,25 @@ +from __future__ import annotations +from dataclasses import dataclass +from typing import Any, Dict, Optional + + +@dataclass +class EvidenceFileData: + file_id: str + file_name: str + created_utc: str + file_size: int + file_extension: str + + evidence_category: Optional[str] = None + + @classmethod + def from_api(cls, data: Dict[str, Any]) -> EvidenceFileData: + return cls( + file_id=data["fileId"], + file_name=data["fileName"], + created_utc=data["createdUtc"], + file_size=data["fileSize"], + file_extension=data["fileExtension"], + evidence_category=data.get("evidenceCategory"), + ) diff --git a/backend/pashub_fetcher/evidence_metadata.py b/backend/pashub_fetcher/evidence_metadata.py new file mode 100644 index 00000000..e3a9536e --- /dev/null +++ b/backend/pashub_fetcher/evidence_metadata.py @@ -0,0 +1,16 @@ +from __future__ import annotations +from dataclasses import dataclass +from typing import Any, Dict + + +@dataclass +class EvidenceMetadata: + container_name: str + blob_uri: str + + @classmethod + def from_api(cls, data: Dict[str, Any]) -> EvidenceMetadata: + return cls( + container_name=data["containerName"], + blob_uri=data["blobUri"], + ) diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile new file mode 100644 index 00000000..cbd3c228 --- /dev/null +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -0,0 +1,27 @@ +FROM mcr.microsoft.com/playwright/python:v1.58.0-jammy + +# Install AWS Lambda RIE +ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/local/bin/aws-lambda-rie +RUN chmod +x /usr/local/bin/aws-lambda-rie + +# Install Lambda runtime client +RUN pip install awslambdaric playwright==1.58.0 requests + +# Set working directory (Lambda task root) +WORKDIR /var/task + +COPY backend/.env.test backend/.env + +COPY utils/ utils/ +COPY backend/pashub_fetcher/ backend/pashub_fetcher/ + + +# Lambda entrypoint +ENTRYPOINT ["/usr/local/bin/aws-lambda-rie", "python", "-m", "awslambdaric"] + +# ----------------------------- +# Lambda handler +# ----------------------------- +# CMD ["backend/pashub_fetcher/handler/handler.handler"] +# For local running +CMD ["backend.pashub_fetcher.handler.handler.handler"] \ No newline at end of file diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py new file mode 100644 index 00000000..df187f3e --- /dev/null +++ b/backend/pashub_fetcher/handler/handler.py @@ -0,0 +1,47 @@ +import time +from typing import Any, List, Mapping + +from backend.pashub_fetcher.cotality_client import CotalityClient, UnauthorizedError +from backend.pashub_fetcher.token_getter import get_token_from_local_storage +from utils.logger import setup_logger + + +logger = setup_logger() + + +def handler(event: Mapping[str, Any], context: Any) -> None: + pas_hub_email = "random@test.com" + pas_hub_password = "my_fake_password" + + try: + token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password) + logger.info(f"Token extracted successfully") + except: + logger.error("Error getting auth token from Pas Hub") + raise + + client = CotalityClient(token=token) + + jobs = [ + "5abf6e27-e4c4-4ba8-b69d-9e34939e0002", + "047f4455-85e2-4293-97b1-6b460137d33e", + ] # TODO: get these from request body + + saved_files: List[str] = [] + for job_id in jobs: + try: + saved_files.extend(client.get_core_envidence_files_by_job_id(job_id)) + + except UnauthorizedError: + logger.warning("Token expired — refreshing") + + token = get_token_from_local_storage(pas_hub_email, pas_hub_password) + + client = CotalityClient(token=token) + + # retry once + saved_files.extend(client.get_core_envidence_files_by_job_id(job_id)) + + time.sleep(10) # Simulate manual download + + print(f"saved {len(saved_files)} files") diff --git a/backend/pashub_fetcher/local_handler/docker-compose.yml b/backend/pashub_fetcher/local_handler/docker-compose.yml new file mode 100644 index 00000000..34ba9277 --- /dev/null +++ b/backend/pashub_fetcher/local_handler/docker-compose.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + pashub-fetcher-lambda: + build: + context: ../../../ + dockerfile: backend/pashub_fetcher/handler/Dockerfile + ports: + - "9000:8080" + env_file: + - ../../../.env \ No newline at end of file diff --git a/backend/pashub_fetcher/local_handler/invoke_local_lambda.py b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py new file mode 100644 index 00000000..463ef9d8 --- /dev/null +++ b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +import json +import requests + +HOST = "localhost" +PORT = "9000" + +LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations" + +payload = { + "Records": [ + { + "body": json.dumps( + { + "uprn": 123456, + } + ) + } + ] +} + +response = requests.post(LAMBDA_URL, json=payload) + +print("Status code:", response.status_code) +print("Response:") +print(response.text) diff --git a/backend/pashub_fetcher/token_getter.py b/backend/pashub_fetcher/token_getter.py new file mode 100644 index 00000000..d5481dd5 --- /dev/null +++ b/backend/pashub_fetcher/token_getter.py @@ -0,0 +1,54 @@ +from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError + +from utils.logger import setup_logger + +logger = setup_logger() + + +def get_token_from_local_storage(email: str, password: str) -> str: + logger.info("Starting Playwright flow") + + with sync_playwright() as p: + browser = p.chromium.launch( + headless=True, + args=["--no-sandbox", "--disable-dev-shm-usage"], + ) + page = browser.new_page() + + try: + logger.info("Navigating to site...") + page.goto("https://pashub.net/", timeout=30000) + + logger.info("Filling login form...") + page.fill("#email", email) + page.fill("#password", password) + + logger.info("Submitting login...") + page.click("#btn-login") + + page.wait_for_timeout(3000) + + if "login" in page.url.lower(): + raise Exception("Login failed (still on login page)") + + logger.info(f"Login likely successful. URL: {page.url}") + + token = page.evaluate( + """() => { + return localStorage.getItem('token'); + }""" + ) + + if not token: + raise Exception("Login succeeded but no token found") + + return token + + except PlaywrightTimeoutError as e: + raise Exception(f"Timeout during login flow: {str(e)}") + + except Exception as e: + raise Exception(f"Unexpected error: {str(e)}") + + finally: + browser.close() diff --git a/scripts/download_cotality_evidence.py b/scripts/download_cotality_evidence.py index 43f9afea..76400a99 100644 --- a/scripts/download_cotality_evidence.py +++ b/scripts/download_cotality_evidence.py @@ -1,7 +1,7 @@ import requests import json -TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzMyMzc4MjQsImV4cCI6MTc3MzI0NTAyNCwic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.mkkxeZiD_ByHY4TJKpLQ-trmeGs15s0ekL6u1n-ek9j-EzNyf6qalEHCyHf8gzdNhU_vay96bIOMRHp4vXFaLqSANwKZayIS3EoA_b9-u2FAZpooxEvReAMNJGoZ6WLD01AQXWv-l7ww1ZqAnQzw0moL_Oma6hVmA5oa-RJKJ3MerS7e0Wei97Db48E140-EAbQf2iPcKYYtCNRA4il6n8DFiqGeoUMGo99jkR1ceZAvMpOAj8RhKX-4qSiDfX6yXUS2G96U5m7S_GWI-DEj5TazkN10Af3TyOY3EVjmZoJcRpiAR4cFmlfcTydjrShU03DWmPZm1QItf2McxfCpNA" +TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzQyODczOTMsImV4cCI6MTc3NDI5NDU5Mywic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.NHh21XfnRofsFkRkc-28Dz-vQAdY70lXkEmh-Mzz7Fg6gjDbZeMu7PnBwgbDP_U8r6R0mI_pDIUc1MzJe1Rf5SF2-RV36TcGzmVzb3ek9wPsy3lxST5WL-vn-qUJ7GsZiGOeQ-jDLLFn8b8tjFrD7BGv8uphrfYAbPDm0atznkdbUSQQy-rfRJWhisnDtHf99j96TuJz3dV4bfI6VGrin-jezbg6BCvUYWQtttUs7knQKEWO0sGGDxtS29sbn4MX8Jqz4-hf6N2XSlgv52aIDwTVX-lyMWzfoeuIGhvCKuDiJeVw2c0r2UZFpHqjnfhXcb0_aacukXe8z-srj8-Rdw" base = "https://pashub.net/api"