diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index 5cd40ced..d7afa6a2 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -6,6 +6,7 @@ psycopg2-binary==2.9.10 python-jose==3.3.0 cryptography==43.0.3 mangum==0.19.0 +playwright==1.58.0 # AWS boto3==1.35.44 # Data diff --git a/backend/docker-compose-local-lambdas.yml b/backend/docker-compose-local-lambdas.yml deleted file mode 100644 index 50e9193b..00000000 --- a/backend/docker-compose-local-lambdas.yml +++ /dev/null @@ -1,11 +0,0 @@ -version: "3.9" - -services: - categorisation-lambda: - build: - context: ../ - dockerfile: backend/categorisation/handler/Dockerfile - ports: - - "9000:8080" - env_file: - - ../.env \ No newline at end of file diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile new file mode 100644 index 00000000..2128d12c --- /dev/null +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -0,0 +1,27 @@ +FROM mcr.microsoft.com/playwright/python:v1.58.0-jammy + +# Install AWS Lambda RIE +ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/local/bin/aws-lambda-rie +RUN chmod +x /usr/local/bin/aws-lambda-rie + +# Install Lambda runtime client +RUN pip install awslambdaric playwright==1.58.0 + +# Set working directory (Lambda task root) +WORKDIR /var/task + +COPY backend/.env.test backend/.env + +COPY utils/ utils/ +COPY backend/pashub_fetcher/ backend/pashub_fetcher/ + + +# Lambda entrypoint +ENTRYPOINT ["/usr/local/bin/aws-lambda-rie", "python", "-m", "awslambdaric"] + +# ----------------------------- +# Lambda handler +# ----------------------------- +# CMD ["backend/pashub_fetcher/handler/handler.handler"] +# For local running +CMD ["backend.pashub_fetcher.handler.handler.handler"] \ No newline at end of file diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py new file mode 100644 index 00000000..b5ec4320 --- /dev/null +++ b/backend/pashub_fetcher/handler/handler.py @@ -0,0 +1,26 @@ +from typing import Any, List, Mapping + +from backend.pashub_fetcher.cotality_client import CotalityClient +from backend.pashub_fetcher.token_getter import get_token_from_local_storage +from utils.logger import setup_logger + + +logger = setup_logger() + + +def handler(event: Mapping[str, Any], context: Any) -> None: + pas_hub_email = "random@test.com" + pas_hub_password = "my_fake_password" + + try: + token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password) + logger.info(f"Token extracted successfully: {token}") + except: + logger.error("Error getting auth token from Pas Hub") + raise + + client = CotalityClient(token=token) + uprn = "100061885568" # TODO: get from request body + + saved_files: List[str] = client.get_evidence_files_by_uprn(uprn=uprn) + print(f"saved {len(saved_files)} files") diff --git a/backend/pashub_fetcher/local_handler/docker-compose.yml b/backend/pashub_fetcher/local_handler/docker-compose.yml new file mode 100644 index 00000000..34ba9277 --- /dev/null +++ b/backend/pashub_fetcher/local_handler/docker-compose.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + pashub-fetcher-lambda: + build: + context: ../../../ + dockerfile: backend/pashub_fetcher/handler/Dockerfile + ports: + - "9000:8080" + env_file: + - ../../../.env \ No newline at end of file diff --git a/backend/pashub_fetcher/local_handler/invoke_local_lambda.py b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py new file mode 100644 index 00000000..463ef9d8 --- /dev/null +++ b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +import json +import requests + +HOST = "localhost" +PORT = "9000" + +LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations" + +payload = { + "Records": [ + { + "body": json.dumps( + { + "uprn": 123456, + } + ) + } + ] +} + +response = requests.post(LAMBDA_URL, json=payload) + +print("Status code:", response.status_code) +print("Response:") +print(response.text) diff --git a/backend/pashub_fetcher/token_getter.py b/backend/pashub_fetcher/token_getter.py new file mode 100644 index 00000000..d5481dd5 --- /dev/null +++ b/backend/pashub_fetcher/token_getter.py @@ -0,0 +1,54 @@ +from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError + +from utils.logger import setup_logger + +logger = setup_logger() + + +def get_token_from_local_storage(email: str, password: str) -> str: + logger.info("Starting Playwright flow") + + with sync_playwright() as p: + browser = p.chromium.launch( + headless=True, + args=["--no-sandbox", "--disable-dev-shm-usage"], + ) + page = browser.new_page() + + try: + logger.info("Navigating to site...") + page.goto("https://pashub.net/", timeout=30000) + + logger.info("Filling login form...") + page.fill("#email", email) + page.fill("#password", password) + + logger.info("Submitting login...") + page.click("#btn-login") + + page.wait_for_timeout(3000) + + if "login" in page.url.lower(): + raise Exception("Login failed (still on login page)") + + logger.info(f"Login likely successful. URL: {page.url}") + + token = page.evaluate( + """() => { + return localStorage.getItem('token'); + }""" + ) + + if not token: + raise Exception("Login succeeded but no token found") + + return token + + except PlaywrightTimeoutError as e: + raise Exception(f"Timeout during login flow: {str(e)}") + + except Exception as e: + raise Exception(f"Unexpected error: {str(e)}") + + finally: + browser.close()