From 8a17ea7265aad5d542782688872525cf30058542 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Mar 2026 15:46:31 +0000 Subject: [PATCH 1/8] set up docker and handler --- backend/pashub_fetcher/handler/Dockerfile | 16 ++++++++++++++++ backend/pashub_fetcher/handler/handler.py | 10 ++++++++++ backend/pashub_fetcher/processor.py | 0 3 files changed, 26 insertions(+) create mode 100644 backend/pashub_fetcher/handler/Dockerfile create mode 100644 backend/pashub_fetcher/handler/handler.py create mode 100644 backend/pashub_fetcher/processor.py diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile new file mode 100644 index 00000000..a67304ad --- /dev/null +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -0,0 +1,16 @@ +FROM mcr.microsoft.com/playwright/python:v1.42.0 + +# Set working directory (Lambda task root) +WORKDIR /var/task + +COPY backend/.env.test backend/.env + +COPY utils/ utils/ +COPY backend/pashub_fetcher/ backend/pashub_fetcher/ + +# ----------------------------- +# Lambda handler +# ----------------------------- +# CMD ["backend/pashub_fetcher/handler/handler.handler"] +# For local running +CMD ["python", "-m", "backend.pashub_fetcher.handler.handler"] \ No newline at end of file diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py new file mode 100644 index 00000000..8fd7e175 --- /dev/null +++ b/backend/pashub_fetcher/handler/handler.py @@ -0,0 +1,10 @@ +from typing import Any, Mapping + +from utils.logger import setup_logger + + +logger = setup_logger() + + +def handler(event: Mapping[str, Any], context: Any) -> None: + logger.info("Recevied message") diff --git a/backend/pashub_fetcher/processor.py b/backend/pashub_fetcher/processor.py new file mode 100644 index 00000000..e69de29b From 142024550e1d0fcea3632b48362702f6918ba9a4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Mar 2026 16:22:53 +0000 Subject: [PATCH 2/8] set up local lambda runner and invoker --- backend/pashub_fetcher/handler/Dockerfile | 13 +++++++++- backend/pashub_fetcher/handler/handler.py | 2 +- .../local_handler/docker-compose.yml} | 6 ++--- .../local_handler/invoke_local_lambda.py | 26 +++++++++++++++++++ 4 files changed, 42 insertions(+), 5 deletions(-) rename backend/{docker-compose-local-lambdas.yml => pashub_fetcher/local_handler/docker-compose.yml} (50%) create mode 100644 backend/pashub_fetcher/local_handler/invoke_local_lambda.py diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile index a67304ad..1534afdb 100644 --- a/backend/pashub_fetcher/handler/Dockerfile +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -1,5 +1,12 @@ FROM mcr.microsoft.com/playwright/python:v1.42.0 +# Install AWS Lambda RIE +ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/local/bin/aws-lambda-rie +RUN chmod +x /usr/local/bin/aws-lambda-rie + +# Install Lambda runtime client +RUN pip install awslambdaric + # Set working directory (Lambda task root) WORKDIR /var/task @@ -8,9 +15,13 @@ COPY backend/.env.test backend/.env COPY utils/ utils/ COPY backend/pashub_fetcher/ backend/pashub_fetcher/ + +# Lambda entrypoint +ENTRYPOINT ["/usr/local/bin/aws-lambda-rie", "python", "-m", "awslambdaric"] + # ----------------------------- # Lambda handler # ----------------------------- # CMD ["backend/pashub_fetcher/handler/handler.handler"] # For local running -CMD ["python", "-m", "backend.pashub_fetcher.handler.handler"] \ No newline at end of file +CMD ["backend.pashub_fetcher.handler.handler.handler"] \ No newline at end of file diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 8fd7e175..00fbd6b5 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -7,4 +7,4 @@ logger = setup_logger() def handler(event: Mapping[str, Any], context: Any) -> None: - logger.info("Recevied message") + logger.info("Received message") diff --git a/backend/docker-compose-local-lambdas.yml b/backend/pashub_fetcher/local_handler/docker-compose.yml similarity index 50% rename from backend/docker-compose-local-lambdas.yml rename to backend/pashub_fetcher/local_handler/docker-compose.yml index 50e9193b..0ee53283 100644 --- a/backend/docker-compose-local-lambdas.yml +++ b/backend/pashub_fetcher/local_handler/docker-compose.yml @@ -3,9 +3,9 @@ version: "3.9" services: categorisation-lambda: build: - context: ../ - dockerfile: backend/categorisation/handler/Dockerfile + context: ../../../ + dockerfile: backend/pashub_fetcher/handler/Dockerfile ports: - "9000:8080" env_file: - - ../.env \ No newline at end of file + - ../../../.env \ No newline at end of file diff --git a/backend/pashub_fetcher/local_handler/invoke_local_lambda.py b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py new file mode 100644 index 00000000..463ef9d8 --- /dev/null +++ b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +import json +import requests + +HOST = "localhost" +PORT = "9000" + +LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations" + +payload = { + "Records": [ + { + "body": json.dumps( + { + "uprn": 123456, + } + ) + } + ] +} + +response = requests.post(LAMBDA_URL, json=payload) + +print("Status code:", response.status_code) +print("Response:") +print(response.text) From a146c7d82e25bee4a75e8fc770596bedc159ae09 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Mar 2026 16:45:53 +0000 Subject: [PATCH 3/8] go to pas hub with playwright --- .devcontainer/backend/requirements.txt | 1 + backend/pashub_fetcher/handler/Dockerfile | 4 ++-- backend/pashub_fetcher/handler/handler.py | 12 +++++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index 5cd40ced..d7afa6a2 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -6,6 +6,7 @@ psycopg2-binary==2.9.10 python-jose==3.3.0 cryptography==43.0.3 mangum==0.19.0 +playwright==1.58.0 # AWS boto3==1.35.44 # Data diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile index 1534afdb..2128d12c 100644 --- a/backend/pashub_fetcher/handler/Dockerfile +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -1,11 +1,11 @@ -FROM mcr.microsoft.com/playwright/python:v1.42.0 +FROM mcr.microsoft.com/playwright/python:v1.58.0-jammy # Install AWS Lambda RIE ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/local/bin/aws-lambda-rie RUN chmod +x /usr/local/bin/aws-lambda-rie # Install Lambda runtime client -RUN pip install awslambdaric +RUN pip install awslambdaric playwright==1.58.0 # Set working directory (Lambda task root) WORKDIR /var/task diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 00fbd6b5..2fe2af1d 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,5 +1,7 @@ from typing import Any, Mapping +from playwright.sync_api import sync_playwright + from utils.logger import setup_logger @@ -7,4 +9,12 @@ logger = setup_logger() def handler(event: Mapping[str, Any], context: Any) -> None: - logger.info("Received message") + logger.info("Starting Playwright flow") + + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + + page.goto("https://pashub.net/") + + logger.info(f"Page title: {page.title()}") From b156617a2b1248bff533ad5acd0ae11beacba5c8 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Mar 2026 17:04:49 +0000 Subject: [PATCH 4/8] try logging into pas hub --- backend/pashub_fetcher/handler/handler.py | 46 ++++++++++++++++--- .../local_handler/docker-compose.yml | 2 +- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 2fe2af1d..3d22ef44 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,6 +1,6 @@ -from typing import Any, Mapping +from typing import Any, Dict, Mapping -from playwright.sync_api import sync_playwright +from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError from utils.logger import setup_logger @@ -8,13 +8,47 @@ from utils.logger import setup_logger logger = setup_logger() -def handler(event: Mapping[str, Any], context: Any) -> None: +def handler(event: Mapping[str, Any], context: Any) -> Dict[str, str]: logger.info("Starting Playwright flow") + email = "random@test.com" + password = "my_fake_password" + with sync_playwright() as p: - browser = p.chromium.launch(headless=True) + browser = p.chromium.launch( + headless=True, + args=["--no-sandbox", "--disable-dev-shm-usage"], + ) page = browser.new_page() - page.goto("https://pashub.net/") + try: + logger.info("Navigating to site...") + page.goto("https://pashub.net/", timeout=30000) - logger.info(f"Page title: {page.title()}") + logger.info("Filling login form...") + page.fill("#email", email) + page.fill("#password", password) + + logger.info("Submitting login...") + page.click("#btn-login") + + page.wait_for_timeout(3000) + + if "login" in page.url.lower(): + logger.error("Login failed (still on login page)") + return {"status": "error", "message": "Login failed"} + + logger.info(f"Login likely successful. URL: {page.url}") + + return {"status": "ok"} + + except PlaywrightTimeoutError as e: + logger.error(f"Timeout during login flow: {str(e)}") + return {"status": "error", "message": "Timeout during login"} + + except Exception as e: + logger.error(f"Unexpected error: {str(e)}") + return {"status": "error", "message": str(e)} + + finally: + browser.close() diff --git a/backend/pashub_fetcher/local_handler/docker-compose.yml b/backend/pashub_fetcher/local_handler/docker-compose.yml index 0ee53283..34ba9277 100644 --- a/backend/pashub_fetcher/local_handler/docker-compose.yml +++ b/backend/pashub_fetcher/local_handler/docker-compose.yml @@ -1,7 +1,7 @@ version: "3.9" services: - categorisation-lambda: + pashub-fetcher-lambda: build: context: ../../../ dockerfile: backend/pashub_fetcher/handler/Dockerfile From 4d641af0c135b11973dc29c118fabd715f205ea9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Mar 2026 09:36:57 +0000 Subject: [PATCH 5/8] extract token from localStorage after logging in --- backend/pashub_fetcher/handler/handler.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 3d22ef44..a5cbe55a 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -40,7 +40,19 @@ def handler(event: Mapping[str, Any], context: Any) -> Dict[str, str]: logger.info(f"Login likely successful. URL: {page.url}") - return {"status": "ok"} + token = page.evaluate( + """() => { + return localStorage.getItem('token'); + }""" + ) + + if not token: + logger.error("Login succeeded but no token found") + return {"status": "error", "message": "No token found"} + + logger.info(f"Token extracted successfully: {token}") + + return {"status": "ok", "token": token} except PlaywrightTimeoutError as e: logger.error(f"Timeout during login flow: {str(e)}") From b8c0c9ea653df2818aa75bf41af32e2783b39a5a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Mar 2026 09:56:53 +0000 Subject: [PATCH 6/8] move playwright process to separate file --- backend/pashub_fetcher/handler/handler.py | 69 ++++------------------- backend/pashub_fetcher/processor.py | 0 backend/pashub_fetcher/token_getter.py | 54 ++++++++++++++++++ 3 files changed, 65 insertions(+), 58 deletions(-) delete mode 100644 backend/pashub_fetcher/processor.py create mode 100644 backend/pashub_fetcher/token_getter.py diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index a5cbe55a..11f457a6 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,66 +1,19 @@ -from typing import Any, Dict, Mapping - -from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError +from typing import Any, Mapping +from backend.pashub_fetcher.token_getter import get_token_from_local_storage from utils.logger import setup_logger logger = setup_logger() -def handler(event: Mapping[str, Any], context: Any) -> Dict[str, str]: - logger.info("Starting Playwright flow") +def handler(event: Mapping[str, Any], context: Any) -> None: + pas_hub_email = "random@test.com" + pas_hub_password = "my_fake_password" - email = "random@test.com" - password = "my_fake_password" - - with sync_playwright() as p: - browser = p.chromium.launch( - headless=True, - args=["--no-sandbox", "--disable-dev-shm-usage"], - ) - page = browser.new_page() - - try: - logger.info("Navigating to site...") - page.goto("https://pashub.net/", timeout=30000) - - logger.info("Filling login form...") - page.fill("#email", email) - page.fill("#password", password) - - logger.info("Submitting login...") - page.click("#btn-login") - - page.wait_for_timeout(3000) - - if "login" in page.url.lower(): - logger.error("Login failed (still on login page)") - return {"status": "error", "message": "Login failed"} - - logger.info(f"Login likely successful. URL: {page.url}") - - token = page.evaluate( - """() => { - return localStorage.getItem('token'); - }""" - ) - - if not token: - logger.error("Login succeeded but no token found") - return {"status": "error", "message": "No token found"} - - logger.info(f"Token extracted successfully: {token}") - - return {"status": "ok", "token": token} - - except PlaywrightTimeoutError as e: - logger.error(f"Timeout during login flow: {str(e)}") - return {"status": "error", "message": "Timeout during login"} - - except Exception as e: - logger.error(f"Unexpected error: {str(e)}") - return {"status": "error", "message": str(e)} - - finally: - browser.close() + try: + token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password) + logger.info(f"Token extracted successfully: {token}") + except: + logger.error("Error getting auth token from Pas Hub") + raise diff --git a/backend/pashub_fetcher/processor.py b/backend/pashub_fetcher/processor.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/pashub_fetcher/token_getter.py b/backend/pashub_fetcher/token_getter.py new file mode 100644 index 00000000..d5481dd5 --- /dev/null +++ b/backend/pashub_fetcher/token_getter.py @@ -0,0 +1,54 @@ +from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError + +from utils.logger import setup_logger + +logger = setup_logger() + + +def get_token_from_local_storage(email: str, password: str) -> str: + logger.info("Starting Playwright flow") + + with sync_playwright() as p: + browser = p.chromium.launch( + headless=True, + args=["--no-sandbox", "--disable-dev-shm-usage"], + ) + page = browser.new_page() + + try: + logger.info("Navigating to site...") + page.goto("https://pashub.net/", timeout=30000) + + logger.info("Filling login form...") + page.fill("#email", email) + page.fill("#password", password) + + logger.info("Submitting login...") + page.click("#btn-login") + + page.wait_for_timeout(3000) + + if "login" in page.url.lower(): + raise Exception("Login failed (still on login page)") + + logger.info(f"Login likely successful. URL: {page.url}") + + token = page.evaluate( + """() => { + return localStorage.getItem('token'); + }""" + ) + + if not token: + raise Exception("Login succeeded but no token found") + + return token + + except PlaywrightTimeoutError as e: + raise Exception(f"Timeout during login flow: {str(e)}") + + except Exception as e: + raise Exception(f"Unexpected error: {str(e)}") + + finally: + browser.close() From cd514b6e5d630e4a9a665eee54b8a3994067c061 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Mar 2026 10:29:42 +0000 Subject: [PATCH 7/8] add and implement cotality_client --- backend/pashub_fetcher/handler/handler.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 11f457a6..a9288783 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,5 +1,6 @@ -from typing import Any, Mapping +from typing import Any, List, Mapping +from backend.pashub_fetcher.cotality_client import CotalityClient from backend.pashub_fetcher.token_getter import get_token_from_local_storage from utils.logger import setup_logger @@ -17,3 +18,9 @@ def handler(event: Mapping[str, Any], context: Any) -> None: except: logger.error("Error getting auth token from Pas Hub") raise + + client = CotalityClient(token=token) + uprn = "100061885568" # TODO: get from request body + + saved_files: List[str] = client.get_evidence_files(uprn=uprn) + print(f"saved {len(saved_files)} files") From 3dc14480e9dd7f41a175e5c9f511f979afc09703 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Mar 2026 10:31:30 +0000 Subject: [PATCH 8/8] rename evidence file getter method --- backend/pashub_fetcher/handler/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index a9288783..b5ec4320 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -22,5 +22,5 @@ def handler(event: Mapping[str, Any], context: Any) -> None: client = CotalityClient(token=token) uprn = "100061885568" # TODO: get from request body - saved_files: List[str] = client.get_evidence_files(uprn=uprn) + saved_files: List[str] = client.get_evidence_files_by_uprn(uprn=uprn) print(f"saved {len(saved_files)} files")